Esempio n. 1
0
import shutil

logging.basicConfig(level=logging.INFO, format='(%(levelname)s): %(message)s')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Import a pipeline into the portal')
    parser.add_argument('pipe_dir',
                        help='Path directory with pipeline definition files.')
    parser.add_argument('--copy',
                        default=False,
                        action='store_true',
                        help='Copy to default pipe location before import')
    args = parser.parse_args()

    lostconfig = config.LOSTConfig()
    if args.copy:
        fm = AppFileMan(lostconfig)
        pp_path = fm.get_pipe_project_path()
        pipe_dir = args.pipe_dir
        if pipe_dir.endswith('/'):
            pipe_dir = pipe_dir[:-1]
        dst_path = os.path.join(pp_path, os.path.basename(pipe_dir))
        shutil.copytree(args.pipe_dir, dst_path, dirs_exist_ok=True)
    else:
        dst_path = args.pipe_dir
    dbm = access.DBMan(lostconfig)
    importer = template_import.PipeImporter(dst_path, dbm)
    importer.start_import()
    dbm.close_session()
Esempio n. 2
0
class PipeEngine(pipe_model.PipeEngine):
    def __init__(self, dbm, pipe, lostconfig, client, logger_name=''):
        '''
        :type dbm: lost.db.access.DBMan
        :type pipe: lost.db.model.Pipe
        '''
        super().__init__(dbm=dbm, pipe=pipe)
        self.lostconfig = lostconfig #type: lost.logic.config.LOSTConfig
        self.file_man = AppFileMan(self.lostconfig)
        # self.logger = lost.logic.log.get_file_logger(
        #     'Executor: {}'.format(self.lostconfig.env_name), 
        #     self.file_man.get_app_log_path('PipeEngine.log'))
        # self.logger = get_task_logger(__name__)
        self.logger = logging.getLogger('{}.{}'.format(
            logger_name, self.__class__.__name__)
        )
        self.client = client

    def process_annotask(self, pipe_e):
        anno_task = self.dbm.get_anno_task(pipe_element_id=pipe_e.idx)
        if anno_task.state == state.AnnoTask.IN_PROGRESS or \
           anno_task.state == state.AnnoTask.PAUSED:
           if not at_man.has_annotation_in_iteration(self.dbm, anno_task.idx, pipe_e.iteration):
                at_man.set_finished(self.dbm, anno_task.idx)
                self.logger.warning('No Annotations have been requested for AnnoTask {}'\
                    .format(anno_task.idx))
                self.logger.warning("%d: AnnoTask has been finished (ID: %d, Name: %s)"\
                                %(self.pipe.idx, anno_task.idx, anno_task.name))
            # if pipe_e.anno_task.dtype == dtype.AnnoTask.MIA:
            #     if anno_task.progress is None:
            #         anno_task.progress = 0.0
            #     if anno_task.progress >= 100.0:
            #          anno_task.state = state.AnnoTask.FINISHED
            #          self.dbm.add(anno_task)
            #          pipe_e.state = state.PipeElement.FINISHED
            #          self.dbm.add(pipe_e)
            #          self.dbm.commit()
            #          print("%d: AnnoTask has been finished (ID: %d, Name: %s)"\
            #                       %(self.pipe.idx, anno_task.idx, anno_task.name))
            #     else:
            #         return
       
        # state = finished will be set in annotation tool
        if anno_task.state == state.AnnoTask.PENDING:
            anno_task.state = state.AnnoTask.IN_PROGRESS
            self.dbm.save_obj(anno_task)
            self.logger.info("%d: AnnoTask IN_PROGRESS (ID: %d, Name: %s)"\
                         %(self.pipe.idx, anno_task.idx, anno_task.name))

    def __gen_run_cmd(self, program, pipe_e):
        # script = self.dbm.get_script(pipe_e.script_id)
        script_path = os.path.join(self.lostconfig.app_path, pipe_e.script.path)
        cmd = self.lostconfig.py3_init + " && "
        cmd += program + " " + script_path + " --idx " + str(pipe_e.idx) 
        return cmd

    def make_debug_session(self, pipe_e):
        debug_path = self.file_man.create_debug_path(pipe_element=pipe_e)
        debug_file_path = os.path.join(debug_path, 'debug.sh')
        # init = self.lostconfig.py3_init + '\n'
        cmd = self.__gen_run_cmd('pudb3', pipe_e)
        # script_content = init + cmd
        script_content = cmd
        with open(debug_file_path, 'w') as dfile:
            dfile.write(script_content)
        script_path = os.path.join(self.lostconfig.app_path, pipe_e.script.path)
        dsession_str = "For DEBUG start: bash " + debug_file_path
        dsession_str += "<br>If you want to EDIT go to: " + script_path
        pipe_e.debug_session = dsession_str
        self.dbm.save_obj(pipe_e)
        self.logger.info('Created debug script: {}'.format(debug_file_path))
        self.logger.info(pipe_e.debug_session)

    def __release_loop_iteration(self, pipe_e):
        pipe_e.loop.iteration += 1
        self.logger.info('{}: Run loop with id {} in iteration {}'.format(self.pipe.idx,
                                                                      pipe_e.loop.idx,
                                                                      pipe_e.loop.iteration))
        loop_pes = self.get_loop_pes(pipe_e.loop.pe_jump, pipe_e)
        for pe in loop_pes:
            pe.iteration += 1
            pe.state = state.PipeElement.PENDING
            if pe.dtype == dtype.PipeElement.ANNO_TASK:
                pe.anno_task.state = state.AnnoTask.PENDING
            elif pe.dtype == dtype.PipeElement.SCRIPT:
                pe.progress = 0.0
            elif pe.dtype == dtype.PipeElement.LOOP:
                # Check for loop in loop case; Set iteration of all inner loops
                # to zero.
                if pe is not pipe_e:
                    pe.loop.iteration = 0
            self.set_to_visit(pe)
            self.dbm.add(pe)

    def process_loop(self, pipe_e):
        if pipe_e.loop.break_loop:
            pipe_e.state = state.PipeElement.FINISHED
            self.dbm.add(pipe_e)
            self.logger.info('{}: Break loop with id {}'.format(self.pipe.idx,
                                                            pipe_e.loop.idx))
            return
        if pipe_e.loop.max_iteration is not None:
            if pipe_e.loop.iteration is None:
                pipe_e.loop.iteration = 0
            if pipe_e.loop.iteration < pipe_e.loop.max_iteration-1:
                self.__release_loop_iteration(pipe_e)
            else:
                pipe_e.state = state.PipeElement.FINISHED
                self.logger.info('{}: Loop ({}) terminated. Max iterations = {}'\
                             .format(self.pipe.idx, pipe_e.loop.idx,
                                     pipe_e.loop.max_iteration))
        else:
            self.__release_loop_iteration(pipe_e)
        self.dbm.add(pipe_e)
        
    def select_env_for_script(self, pipe_e):
        '''Select an environment where the script should be executed'''
        w_man = WorkerMan(self.dbm, self.lostconfig)
        if pipe_e.script.envs is not None:
            script_envs = json.loads(pipe_e.script.envs)
            if len(script_envs) == 0:
                return 'celery'
        else:
            script_envs = list()
            return 'celery' # Return default queue
        worker_envs = w_man.get_worker_envs()        
        for script_env in script_envs:
            if script_env in worker_envs:
                return script_env
        self.logger.warning('No suitable env to execute script: {}'.format(pipe_e.script.path))
        return None 

    def dask_done_callback(self, fut):
        self.logger.info(f'fut.done: {fut.done()}')
        self.logger.info(f'fut.cancelled: {fut.cancelled()}')
        exc = fut.exception()
        if exc is None:
            self.logger.info(fut.result())
        else:
            self.logger.info(f'exception:\n{fut.exception()}')
            self.logger.error('traceback:\n{}'.format(
                ''.join(
                    [f'{x}' for x in traceback.format_tb(fut.traceback())]
                )
            ))
        # class User():
        #     def __init__(self, idx):
        #         self.idx = idx

        # # client = ds_man.get_dask_client(User(1))
        # self.logger.info(f'shutdown cluster: {ds_man.shutdown_cluster(User(1))}')
        # self.logger.info(f'client.restart: {client.restart()}')

    def _install_extra_packages(self, client, packages):
        def install(cmd):
            import subprocess
            output = subprocess.check_output(f'{cmd}',stderr=subprocess.STDOUT, shell=True)
            return output
            # import os
            # os.system(f'{install_cmd} {packages}')
        pip_cmd, conda_cmd = gen_extra_install_cmd(packages, self.lostconfig)
        if pip_cmd is not None:
            self.logger.info(f'Start install cmd: {pip_cmd}')
            self.logger.info(client.run(install, pip_cmd))
            self.logger.info(f'Install finished: {pip_cmd}')
        if conda_cmd is not None:
            self.logger.info(f'Start install cmd: {conda_cmd}')
            self.logger.info(client.run(install, conda_cmd))
            self.logger.info(f'Install finished: {conda_cmd}')

    def exec_dask_direct(self, client, pipe_e, worker=None):
        scr = pipe_e.script
        self._install_extra_packages(client, scr.extra_packages)
        # extra_packages = json.loads(scr.extra_packages)
        # if self.lostconfig.allow_extra_pip:
        #     self._install_extra_packages(client, 'pip install', extra_packages['pip'])
        # if self.lostconfig.allow_extra_conda:
        #     self._install_extra_packages(client, 'conda install', extra_packages['conda'])
        pp_path = self.file_man.get_pipe_project_path(pipe_e.script)
        # self.logger.info('pp_path: {}'.format(pp_path))
        # timestamp = datetime.now().strftime("%m%d%Y%H%M%S")
        # packed_pp_path = self.file_man.get_packed_pipe_path(
        #     f'{os.path.basename(pp_path)}.zip', timestamp
        # )
        # self.logger.info('packed_pp_path: {}'.format(packed_pp_path))
        # if ppp_man.should_i_update(client, pp_path):
        #     exec_utils.zipdir(pp_path, packed_pp_path, timestamp)
        #     self.logger.info(f'Upload file:{client.upload_file(packed_pp_path)}')
        # import_name = exec_utils.get_import_name_by_script(
        #     pipe_e.script.name, timestamp)
        # self.logger.info(f'import_name:{import_name}')
        import_name = ppp_man.prepare_import(
            client, pp_path, pipe_e.script.name, self.logger
        )
        fut = client.submit(exec_utils.exec_dyn_class, pipe_e.idx, 
            import_name, workers=worker
        )
        fut.add_done_callback(self.dask_done_callback)

    def start_script(self,pipe_e):
        if self.client is not None: # Workermanagement == static
            env = self.select_env_for_script(pipe_e)
            if env is None:
                return
            # celery_exec_script.apply_async(args=[pipe_e.idx], queue=env)
            worker = env
            client = self.client
        else:
            # If client is None, try to get client form dask_session
            user = self.dbm.get_user_by_id(self.pipe.manager_id)
            client = ds_man.get_dask_client(user)
            ds_man.refresh_user_session(user)
            self.logger.info('Process script with dask client: {}'.format(client))
            self.logger.info('dask_session: {}'.format(ds_man.session))
            # logger.info('pipe.manager_id: {}'.format(p.manager_id))
            # logger.info('pipe.name: {}'.format(p.name))
            # logger.info('pipe.group_id: {}'.format(p.group_id))
            worker = None
            # client.submit(exec_script_in_subprocess, pipe_e.idx)
        if self.lostconfig.script_execution == 'subprocess':
            fut = client.submit(exec_script_in_subprocess, pipe_e.idx, workers=worker)
            fut.add_done_callback(self.dask_done_callback)
        else:
            self.exec_dask_direct(client, pipe_e, worker)

    def process_pipe_element(self):
        pipe_e = self.get_next_element()
        while (pipe_e is not None):
            # if pipe_e is None:
            #     return
            if pipe_e.dtype == dtype.PipeElement.SCRIPT:
                if pipe_e.state != state.PipeElement.SCRIPT_ERROR:
                    # if pipe_e.is_debug_mode:
                    #     pipe_e.state = state.PipeElement.IN_PROGRESS
                    #     self.dbm.save_obj(pipe_e)
                    #     self.make_debug_session(pipe_e)
                    # else:
                    if pipe_e.state == state.PipeElement.PENDING:
                        self.start_script(pipe_e)
                        pipe = pipe_e.pipe
                        self.logger.info('PipeElementID: {} Excuting script: {}'.format(pipe_e.idx, 
                            pipe_e.script.name))
            elif pipe_e.dtype == dtype.PipeElement.ANNO_TASK:
                if pipe_e.state == state.PipeElement.PENDING:
                    update_anno_task(self.dbm, pipe_e.anno_task.idx)
                    try: 
                        email.send_annotask_available(self.dbm, pipe_e.anno_task)
                    except:
                        msg = "Could not send Email. \n"
                        msg += traceback.format_exc()
                        self.logger.error(msg)
                    pipe_e.state = state.PipeElement.IN_PROGRESS
                    self.dbm.save_obj(pipe_e)
                self.process_annotask(pipe_e)
            elif pipe_e.dtype == dtype.PipeElement.DATASOURCE:
                pipe_e.state = state.PipeElement.FINISHED
                self.dbm.save_obj(pipe_e)
            elif pipe_e.dtype == dtype.PipeElement.VISUALIZATION:
                pipe_e.state = state.PipeElement.FINISHED
                self.dbm.save_obj(pipe_e)
            elif pipe_e.dtype == dtype.PipeElement.DATA_EXPORT:
                pipe_e.state = state.PipeElement.FINISHED
                self.dbm.save_obj(pipe_e)
            elif pipe_e.dtype == dtype.PipeElement.LOOP:
                self.process_loop(pipe_e)
                self.dbm.commit()
            pipe_e = self.get_next_element()

    def refesh_dask_user_session(self):
        if self.client is None:
            user = self.dbm.get_user_by_id(self.pipe.manager_id)
            ds_man.refresh_user_session(user)
            # self.logger.info('Refreshed dask user session for user: {}'.format(user.idx))

    def process_pipeline(self):
        try:
            p = self.pipe
            # print('Process pipe: {}'.format(self.pipe.name))
            if p.is_locked is None:
               p.is_locked = False
            if not p.is_locked:
                p.is_locked = True
                self.dbm.save_obj(p)
            else:
                return
            if p.state == state.Pipe.PENDING:
                self.refesh_dask_user_session()
                p.state = state.Pipe.IN_PROGRESS
                self.dbm.save_obj(p)
                self.process_pipe_element()
            elif p.state == state.Pipe.IN_PROGRESS:
                self.refesh_dask_user_session()
                self.process_pipe_element()
            elif p.state == state.Pipe.FINISHED:
                return
            elif p.state == state.Pipe.ERROR:
                self.__report_error(p)
            else:
                raise Exception("Unknown PipeState!")
            p.is_locked = False
            self.dbm.save_obj(p)
        except:
            p.is_locked = False
            self.dbm.save_obj(p)
            raise

    def get_next_element(self):
        pe_wait = None
        for candidate in self.get_to_visit():
            if candidate is None:
                if self.pipe_finished():
                    self.pipe.state = state.Pipe.FINISHED
                    self.pipe.timestamp_finished = datetime.now()
                    self.dbm.save_obj(self.pipe)
                    self.logger.info("%d: Task is finished (Name: %s)"%(self.pipe.idx,
                                                                    self.pipe.name))                                    
                    try: 
                        email.send_pipeline_finished(self.pipe)
                    except:
                        msg = "Could not send Email. \n"
                        msg += traceback.format_exc()
                        self.logger.error(msg)
                    return None
                else:
                    continue
            else:
                pe = self.check_candiate(candidate)
                if pe is None:
                    continue
                #If there is a loop under candidates, it should be executed as
                #last possible element. Since a loop will set all other elements
                #within the loop to pending when processed. So if the last element
                #before the loop has subsequent elements. These elements would never
                #be executed since the loop would set the last element in the loop
                #to pending.
                elif pe.dtype == dtype.PipeElement.LOOP:
                    pe_wait = pe
                    continue
                else:
                    self.set_visited(pe)
                    return pe
        return pe_wait

    def pipe_finished(self):
        for pe in self.get_final_pes():
            if pe.state != state.PipeElement.FINISHED:
                return False
        return True

    def check_candiate(self, candidate):
        # If all prev elements are finished return candidate
        for pe_prev in self.get_prev_pes(candidate):
            if pe_prev is not None:
                if pe_prev.state != state.PipeElement.FINISHED:
                    return None
                # if pe_prev.state == state.PipeElement.FINISHED:
                #     if candidate.state == state.PipeElement.PENDING:
                #         return candidate
                #     elif candidate.dtype == dtype.PipeElement.ANNOTATION_TASK and\
                #         candidate.state == state.PipeElement.IN_PROGRESS:
                #         return candidate
            else:
                # if pe_prev is None and candidate.state == PENDING
                if candidate.state == state.PipeElement.PENDING:
                    return candidate
        return candidate

    def __report_error(self, pipe):
        for pipe_element in self.dbm.get_script_errors(pipe.idx):
            # Send mail to inform user about script error.
            try:
                email.send_script_error(pipe, pipe_element)
                pipe_element.error_reported = True
                self.dbm.add(pipe_element)
                self.dbm.commit()    
            except:
                pipe_element.error_reported = True
                pipe_element.error_msg +=  traceback.format_exc()
                self.dbm.add(pipe_element)
                self.dbm.commit()