def consume_events(): try: events = speventdao.get_events( status=spconst.EVENT_STATUS_NEW, limit=20) # process 20 events at a time (arbitrary) if len(events) > 0: try: conn = spdb.connect() conn.execute( 'begin immediate') # transaction begin (full db lock) for e in events: process_event(e, conn) # Check # (at this point, all events statuses should be !EVENT_STATUS_NEW) li = [ e for e in events if e.status in [spconst.EVENT_STATUS_NEW] ] assert len(li) == 0 # Switch processed events status in DB splog.info('SPEVENTT-003', "%i event(s) processed" % len(events)) speventdao.update_events(events, conn) conn.commit() # transaction end finally: spdb.disconnect( conn) # if exception occur, we do the rollback here except Exception, e: traceback.print_exc(file=open(spconfig.stacktrace_log_file, "a"))
def disconnect(conn): if is_connected(conn): conn.close() # hack # # force sqlite db file to be group writable # # It should be done with umask when creating the db, but seems not working due to a bug. # # more info # http://www.mail-archive.com/[email protected]/msg59080.html # https://code.djangoproject.com/ticket/19292 # if os.path.exists(spconfig.db_file): if not sptools.is_group_writable(spconfig.db_file): if sptools.set_file_permission(spconfig.db_file): splog.info( "SPDATABA-003", "File permissions have been modified ('%s')" % spconfig.db_file) else: # we come here when user have not enough priviledge to set file permission splog.info( "SPDATABA-004", "Missing privilege to modify file permissions ('%s')" % spconfig.db_file)
def consume_events(): try: events=speventdao.get_events(status=spconst.EVENT_STATUS_NEW,limit=20) # process 20 events at a time (arbitrary) if len(events)>0: try: conn=spdb.connect() conn.execute('begin immediate') # transaction begin (full db lock) for e in events: process_event(e,conn) # Check # (at this point, all events statuses should be !EVENT_STATUS_NEW) li=[e for e in events if e.status in [spconst.EVENT_STATUS_NEW]] assert len(li)==0 # Switch processed events status in DB splog.info('SPEVENTT-003',"%i event(s) processed"%len(events)) speventdao.update_events(events,conn) conn.commit() # transaction end finally: spdb.disconnect(conn) # if exception occur, we do the rollback here except Exception, e: traceback.print_exc(file=open(spconfig.stacktrace_log_file,"a"))
def start(): try: http_server.serve_forever() except KeyboardInterrupt: splog.info('SPRPCSRV-001','http_server.serve_forever stopped by KeyboardInterrupt') http_server.shutdown() except SystemExit: splog.info('SPRPCSRV-002','http_server.serve_forever stopped by SystemExit') http_server.shutdown()
def run(cls, ppt): splog.info("SPPOSTPR-001", "Post-processing task started (%s)" % str(ppt)) cls.start_external_script( ppt ) # currently, we only use fork (support for thread without fork (i.e without external process) will be added if needed) ppt.end_date = sptime.now()
def start_external_script(cls,ppt): (status,stdout,stderr)=sputils.get_status_output(ppt.get_command_line(),shell=True) # fork is blocking here, so thread will wait until external process complete ppt.script_exit_status=status if status==0: ppt.status=spconst.JOB_STATUS_DONE splog.info("SPPOSTPR-002","Post-processing task successfully completed (%s)"%str(ppt)) else: ppt.status=spconst.JOB_STATUS_ERROR ppt.error_msg="Error occurs in external script" splog.info("SPPOSTPR-004","Post-processing task completed with error(s) (%s)"%str(ppt))
def get_job(job_class=None,pipeline=None,order=None): # note that 'job_class' is an alias for 'transition' (seems a better term from the worker view). splog.info("SPPOSTPR-108","Job request (job_class=%s,pipeline=%s)"%(job_class,pipeline)) try: conn=spdb.connect() conn.execute('begin immediate') # get job ppprun=spppprdao.get_one_waiting_ppprun(job_class,pipeline,order,conn) # raise exception if no job found # retrieve job metadata from pipeline definition pipeline=spppp.get_pipeline(ppprun.pipeline) pipeline.set_current_state(ppprun.state) folder=pipeline.get_current_state().transition.workdir # dataset_pattern resolution (when possible (e.g. for 'merge' it is not possible as we go from TWO src dir (i.e. 'output12'), so we need to keep the '*' char)) # # TODO: find an elegant way to manage /*/ tranformation (i.e. to /process/ for the axis_normal case). Maybe move this logic into spppp.py. # dataset_pattern=ppprun.dataset_pattern.replace('/*/','/'+folder+'/') # prepare argument to make it easier for the job if ppprun.variable=='': arg='%s/%s/'%(spconfig.data_folder,dataset_pattern) else: arg='%s/%s/%s/'%(spconfig.data_folder,dataset_pattern,ppprun.variable) # notes: # - job_class and transition are the same (transition is from the FSM view, and job_class is from the job consumer view). # - transition must be set the the job, because we need it when doing insertion in jobrun table. job=JOBRun(job_class=ppprun.transition, full_path_variable=arg, # TODO: rename full_path_variable into generic name (matching both variable and dataset only path) error_msg=None, transition=ppprun.transition, dataset_pattern=dataset_pattern, variable=ppprun.variable, start_date=sptime.now(), ppprun_id=ppprun.ppprun_id) # update DB ppprun.error_msg=None # we reset values from previous try if any ppprun.status=spconst.PPPRUN_STATUS_RUNNING ppprun.last_mod_date=sptime.now() spppprdao.update_ppprun(ppprun,conn) conn.commit() splog.info("SPPOSTPR-104","Job started (ppprun_id=%s)"%str(job.ppprun_id)) return job except NoPostProcessingTaskWaitingException, e: return None # this means no more job to process
def get_job(job_class=None,pipeline=None,order=None): # note that 'job_class' is an alias for 'transition' (seems a better term from the worker view). splog.info("SPPOSTPR-108","Job request (job_class=%s,pipeline=%s)"%(job_class,pipeline)) try: conn=spdb.connect() conn.execute('begin immediate') # get job ppprun=spppprdao.get_one_waiting_ppprun(job_class,pipeline,order,conn) # raise exception if no job found # retrieve job metadata from pipeline definition pipeline=sppipeline.get_pipeline(ppprun.pipeline) pipeline.set_current_state(ppprun.state) assert pipeline.get_current_state().transition is not None # transition of a waiting job is never None # be sure that transition from ppprun table matches computed transition from pipeline definition # (should always be the case, except if someone perform incorrect manual modifications in the database) assert pipeline.get_current_state().transition.name==ppprun.transition generic_args=Bunch(pipeline=ppprun.pipeline, project=ppprun.project, model=ppprun.model, dataset_pattern=ppprun.dataset_pattern, variable=ppprun.variable, data_folder=spconfig.data_folder) # notes: # - job_class and transition are the same (transition is from the finite state machine view, and job_class is from the job consumer view). # - transition must be set in the job, because we need it when doing insertion in jobrun table. job=JOBRun(job_class=ppprun.transition, args=pipeline.get_current_state().transition.get_args(generic_args), error_msg=None, transition=ppprun.transition, start_date=sptime.now(), ppprun_id=ppprun.ppprun_id) # update DB ppprun.error_msg=None # we reset values from previous try if any ppprun.status=spconst.PPPRUN_STATUS_RUNNING ppprun.last_mod_date=sptime.now() spppprdao.update_ppprun(ppprun,conn) conn.commit() splog.info("SPPOSTPR-104","Job started (ppprun_id=%s)"%str(job.ppprun_id)) return job except NoPostProcessingTaskWaitingException, e: return None # this means no more job to process
def add_ppprun(pipeline,status,project,model,dataset_pattern,variable,conn): if spppprdao.exists_ppprun(PPPRun(pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable),conn): # retrieve pipeline from db pppruns=spppprdao.get_pppruns(order='fifo',pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable,conn=conn) if len(pppruns)!=1: raise SPException("SPPOSTPR-440","Incorrect number of runs (number_of_runs=%i,pipeline=%s,dataset_pattern=%s,variable=%s)"%(len(pppruns),pipeline,dataset_pattern,variable)) else: ppprun=pppruns[0] # check existing pipeline state (if state do not allow us to restart it, we raise PipelineRunningException) if pipeline=='CMIP5_001': if ppprun.status==spconst.PPPRUN_STATUS_DONE: pass else: raise PipelineRunningException() elif pipeline=='CMIP5_002': if ppprun.status==spconst.PPPRUN_STATUS_DONE: pass elif ppprun.status==spconst.PPPRUN_STATUS_PAUSE: if ppprun.state=='S1100': # be sure we are at the beginning of the pipe (as 'pausing' is a status that may occurs anywhere in the pipeline). TODO: replace hardcoded S1100 with pipeline first state (as state name can change in the future) # note that in this case, we update the pipe, but it doesn't hange anything as the pipe is already in the right state, pass else: raise PipelineRunningException() else: raise PipelineRunningException() else: raise SPException('SPPOSTPR-450','Unknown pipeline (%s)'%pipeline) # retrieve pipeline definition (note that code below is not reentrant/threadsafe: it works only because execution mode is serial (i.e. non parallel)) pipeline=spppp.get_pipeline(pipeline) pipeline.reset() state=pipeline.get_current_state().source transition=pipeline.get_current_state().transition # set new values ppprun.state=state ppprun.transition=transition.name ppprun.status=status ppprun.error_msg=None ppprun.last_mod_date=sptime.now() # save spppprdao.update_ppprun(ppprun,conn) splog.info("SPPOSTPR-202","Pipeline updated (%s)"%str(ppprun)) else: ppprun=build_ppprun(pipeline,status,project,model,dataset_pattern,variable) id_=spppprdao.add_ppprun(ppprun,conn) # autoincrement field is stored in 'id_'. Not used for now. splog.info('SPPOSTPR-052','New pipeline added (%s,%s,%s,%s,%s,%s)'%(pipeline,status,project,model,dataset_pattern,variable))
def trigger_pipeline(ending,dependent_pipeline,trigger_type,conn): # 'ending' is an alias for the pipeline which just ends li=[] if trigger_type==spconst.TRIGGER_TYPE_NV2D: if all_variable_complete(ending.pipeline,ending.dataset_pattern,conn): # all sibling variable pipelines are complete # retrieve dataset ppprun li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn) else: # some variable pipeline are not complete # nothing to do pass elif trigger_type==spconst.TRIGGER_TYPE_V2V: li=spppprdao.get_pppruns(order='fifo',variable=ending.variable,dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn) if len(li)<1: splog.info("SPPOSTPR-264","Dep not found (%s,%s,%s)"%(dependent_pipeline,ending.variable,ending.dataset_pattern)) elif trigger_type==spconst.TRIGGER_TYPE_D2D: li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn) if len(li)<1: splog.info("SPPOSTPR-262","Dep not found (%s,%s)"%(dependent_pipeline,ending.dataset_pattern)) elif trigger_type==spconst.TRIGGER_TYPE_D2NV: li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn) if len(li)<1: splog.info("SPPOSTPR-268","Dep not found (%s,%s)"%(dependent_pipeline,ending.dataset_pattern)) else: splog.info("SPPOSTPR-201","We shouldn't be here (%s,%s)"%(ending.variable,ending.dataset_pattern)) for ppprun in li: pause_to_waiting(ppprun,conn)
def terminate(signum, frame): import splog, speventthread # both must be here because of double-fork (i.e. we can't move import at the top of this file, because the first import must occur in 'main_loop' func). speventthread too, because speventthread do use splog. splog.info('SPDAEMON-038',"Daemon stopping ...") # stop event thread if spconfig.config.get('daemon','eventthread')=='1': speventthread.stop() # stop HTTP server raise SystemExit() # Code below is for a 'while loop' based daemon """
def terminate(signum, frame): import splog, speventthread # both must be here because of double-fork (i.e. we can't move import at the top of this file, because the first import must occur in 'main_loop' func). speventthread too, because speventthread do use splog. splog.info('SPDAEMON-038', "Daemon stopping ...") # stop event thread if spconfig.config.get('daemon', 'eventthread') == '1': speventthread.stop() # stop HTTP server raise SystemExit() # Code below is for a 'while loop' based daemon """
def start(): try: http_server.serve_forever() except KeyboardInterrupt: splog.info('SPRPCSRV-001', 'http_server.serve_forever stopped by KeyboardInterrupt') http_server.shutdown() except SystemExit: # normal termination splog.info('SPRPCSRV-002', 'http_server.serve_forever stopped by SystemExit') http_server.shutdown()
def trigger_pipeline( ending, dependent_pipeline, trigger_type, conn): # 'ending' is an alias for the pipeline which just ends li = [] if trigger_type == spconst.TRIGGER_TYPE_NV2D: if all_variable_complete(ending.pipeline, ending.dataset_pattern, conn): # all sibling variable pipelines are complete # retrieve dataset ppprun li = spppprdao.get_pppruns(order='fifo', dataset_pattern=ending.dataset_pattern, pipeline=dependent_pipeline, conn=conn) else: # some variable pipeline are not complete # nothing to do pass elif trigger_type == spconst.TRIGGER_TYPE_V2V: li = spppprdao.get_pppruns(order='fifo', variable=ending.variable, dataset_pattern=ending.dataset_pattern, pipeline=dependent_pipeline, conn=conn) if len(li) < 1: splog.info( "SPPOSTPR-264", "Dep not found (%s,%s,%s)" % (dependent_pipeline, ending.variable, ending.dataset_pattern)) elif trigger_type == spconst.TRIGGER_TYPE_D2D: li = spppprdao.get_pppruns(order='fifo', dataset_pattern=ending.dataset_pattern, pipeline=dependent_pipeline, conn=conn) if len(li) < 1: splog.info( "SPPOSTPR-262", "Dep not found (%s,%s)" % (dependent_pipeline, ending.dataset_pattern)) elif trigger_type == spconst.TRIGGER_TYPE_D2NV: li = spppprdao.get_pppruns(order='fifo', dataset_pattern=ending.dataset_pattern, pipeline=dependent_pipeline, conn=conn) if len(li) < 1: splog.info( "SPPOSTPR-268", "Dep not found (%s,%s)" % (dependent_pipeline, ending.dataset_pattern)) else: splog.info( "SPPOSTPR-201", "We shouldn't be here (%s,%s)" % (ending.variable, ending.dataset_pattern)) for ppprun in li: pause_to_waiting(ppprun, conn)
def restart_pipeline(ppprun,status,conn): # retrieve pipeline definition (note that code below is not reentrant/threadsafe: it works only because execution mode is serial (i.e. non parallel)) p=sppipeline.get_pipeline(ppprun.pipeline) p.reset() state=p.get_current_state().source transition=p.get_current_state().transition # set new values ppprun.state=state ppprun.transition=transition.name ppprun.status=status ppprun.error_msg=None ppprun.last_mod_date=sptime.now() # save spppprdao.update_ppprun(ppprun,conn) splog.info("SPPOSTPR-202","Pipeline updated (%s)"%str(ppprun))
def restart_pipeline(ppprun, status, conn): # retrieve pipeline definition (note that code below is not reentrant/threadsafe: it works only because execution mode is serial (i.e. non parallel)) p = sppipeline.get_pipeline(ppprun.pipeline) p.reset() state = p.get_current_state().source transition = p.get_current_state().transition # set new values ppprun.state = state ppprun.transition = transition.name ppprun.status = status ppprun.error_msg = None ppprun.last_mod_date = sptime.now() # save spppprdao.update_ppprun(ppprun, conn) splog.info("SPPOSTPR-202", "Pipeline updated (%s)" % str(ppprun))
def start_external_script(cls, ppt): (status, stdout, stderr) = sputils.get_status_output( ppt.get_command_line(), shell=True ) # fork is blocking here, so thread will wait until external process complete ppt.script_exit_status = status if status == 0: ppt.status = spconst.JOB_STATUS_DONE splog.info( "SPPOSTPR-002", "Post-processing task successfully completed (%s)" % str(ppt)) else: ppt.status = spconst.JOB_STATUS_ERROR ppt.error_msg = "Error occurs in external script" splog.info( "SPPOSTPR-004", "Post-processing task completed with error(s) (%s)" % str(ppt))
def add_ppprun(pipeline,status,project,model,dataset_pattern,variable,conn): if spppprdao.exists_ppprun(PPPRun(pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable),conn): # retrieve pipeline from db pppruns=spppprdao.get_pppruns(order='fifo',pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable,conn=conn) if len(pppruns)!=1: raise SPException("SPPOSTPR-440","Incorrect number of runs (number_of_runs=%i,pipeline=%s,dataset_pattern=%s,variable=%s)"%(len(pppruns),pipeline,dataset_pattern,variable)) else: ppprun=pppruns[0] if ppprun.status in [spconst.PPPRUN_STATUS_PAUSE,spconst.PPPRUN_STATUS_DONE]: # check existing pipeline state (if state do not allow us to restart it, we raise PipelineRunningException). This is to prevent a reset on a running pipeline. 'waiting' is not accepted to prevent race condition (job starting just while we are here) => TBC. restart_pipeline(ppprun,status,conn) else: raise PipelineRunningException() else: ppprun=build_ppprun(pipeline,status,project,model,dataset_pattern,variable) id_=spppprdao.add_ppprun(ppprun,conn) # autoincrement field is stored in 'id_'. Not used for now. splog.info('SPPOSTPR-052','New pipeline added (%s,%s,%s,%s,%s,%s)'%(pipeline,status,project,model,dataset_pattern,variable))
def get_new_pipeline_status(start_dependency, dependency_type, e, conn): if dependency_type == spconst.TRIGGER_TYPE_D2NV: # N to 1 (memo: works in reverse with D2NV). we want to find one dataset from N var. v_ = '' # unset variable so to match the dataset elif dependency_type == spconst.TRIGGER_TYPE_D2D: # 1 to 1 v_ = e.variable # default is ok elif dependency_type == spconst.TRIGGER_TYPE_NV2D: # 1 to N (memo: works in reverse with NV2D). we want to find N var from one dataset. # hack: we force status to PAUSE here. # # The trigger will happen at the end of the last 'variable' pipeline # (in postprocessing.job_done func). A better way maybe is to call # 'all_variable_complete' func here. Also maybe merging both dep code # (in postprocessing.job_done and here) would improve clarity.. # return spconst.PPPRUN_STATUS_PAUSE pipeline_dependency = get_pipeline_dependency(start_dependency, e.dataset_pattern, v_, conn) # retrieve dependency if pipeline_dependency is not None: splog.info( 'SPEVENTT-046', "dependency found in ppprun table (dependency=%s)" % (start_dependency, )) if pipeline_dependency.status == spconst.PPPRUN_STATUS_DONE: splog.info( 'SPEVENTT-048', "Create with WAITING status as dependent pipeline is done (dependency=%s,dataset_pattern=%s,variable=%s)" % (start_dependency, e.dataset_pattern, e.variable)) status = spconst.PPPRUN_STATUS_WAITING else: splog.info( 'SPEVENTT-010', 'Create with PAUSE status as dependent pipeline is not done (dataset_pattern=%s,variable=%s)' % (e.dataset_pattern, e.variable)) status = spconst.PPPRUN_STATUS_PAUSE else: splog.info( 'SPEVENTT-018', "Create with PAUSE status as dependent pipeline doesn't exist (dataset_pattern=%s,variable=%s)" % (e.dataset_pattern, e.variable)) status = spconst.PPPRUN_STATUS_PAUSE return status
def main_loop(): import splog, speventthread # both must be here because of double-fork (speventthread too, because speventthread do use splog) splog.info('SPDAEMON-001', "Daemon starting ...") import spdb # this is to create database objects if not done already (must be done beforee starting the rpc server) # start event thread if spconfig.config.get('daemon', 'eventthread') == '1': speventthread.start() import sprpcserver sprpcserver.start() # Code below is for a 'while loop' based daemon """ while quit==0: splog.info('SPDAEMON-024',"Daemon running") time.sleep(3) """ splog.info('SPDAEMON-034', "Daemon stopped")
def main_loop(): import splog, speventthread # both must be here because of double-fork (speventthread too, because speventthread do use splog) splog.info('SPDAEMON-001',"Daemon starting ...") import spdb # this is to create database objects if not done already (must be done beforee starting the rpc server) # start event thread if spconfig.config.get('daemon','eventthread')=='1': speventthread.start() import sprpcserver sprpcserver.start() # Code below is for a 'while loop' based daemon """ while quit==0: splog.info('SPDAEMON-024',"Daemon running") time.sleep(3) """ splog.info('SPDAEMON-034',"Daemon stopped")
def add_ppprun(pipeline, status, project, model, dataset_pattern, variable, conn): if spppprdao.exists_ppprun( PPPRun(pipeline=pipeline, dataset_pattern=dataset_pattern, variable=variable), conn): # retrieve pipeline from db pppruns = spppprdao.get_pppruns(order='fifo', pipeline=pipeline, dataset_pattern=dataset_pattern, variable=variable, conn=conn) if len(pppruns) != 1: raise SPException( "SPPOSTPR-440", "Incorrect number of runs (number_of_runs=%i,pipeline=%s,dataset_pattern=%s,variable=%s)" % (len(pppruns), pipeline, dataset_pattern, variable)) else: ppprun = pppruns[0] if ppprun.status in [ spconst.PPPRUN_STATUS_PAUSE, spconst.PPPRUN_STATUS_DONE ]: # check existing pipeline state (if state do not allow us to restart it, we raise PipelineRunningException). This is to prevent a reset on a running pipeline. 'waiting' is not accepted to prevent race condition (job starting just while we are here) => TBC. restart_pipeline(ppprun, status, conn) else: raise PipelineRunningException() else: ppprun = build_ppprun(pipeline, status, project, model, dataset_pattern, variable) id_ = spppprdao.add_ppprun( ppprun, conn) # autoincrement field is stored in 'id_'. Not used for now. splog.info( 'SPPOSTPR-052', 'New pipeline added (%s,%s,%s,%s,%s,%s)' % (pipeline, status, project, model, dataset_pattern, variable))
def process_event(e, conn): # ignore event based on project if e.project not in spconst.AUTHORIZED_PROJECT: splog.info('SPEVENTT-024', "Event ignored (%s)" % str(e)) e.status = spconst.EVENT_STATUS_OLD # mark events as done return # ignore unknown event if e.name not in pipelinedep.event_pipeline_mapping: splog.info( 'SPEVENTT-004', "Ignore event as not declared in spbindings file (%s)" % str(e)) e.status = spconst.EVENT_STATUS_OLD # mark events as done return # retrieve the pipeline which is binded to the event pipeline_name, start_status = pipelinedep.event_pipeline_mapping[e.name] # manage start dependency # this is to access the 'trigger' dict from the 'value' side reverse_trigger = dict( (v[0], (k, v[1])) for k, v in pipelinedep.trigger.iteritems() ) # TODO: replace this with a bidirectional dict. Maybe also add loop to allow multiple dependencies. if pipeline_name in reverse_trigger: splog.info( 'SPEVENTT-044', "starting dependency exists for this pipeline in configuration file (new_pipeline=%s,dependency=%s)" % (pipeline_name, reverse_trigger[pipeline_name])) # retrieve dependency start_dependency = reverse_trigger[pipeline_name][0] dependency_type = reverse_trigger[pipeline_name][1] start_status = get_new_pipeline_status(start_dependency, dependency_type, e, conn) # override 'start_status' else: start_dependency = None # main create_pipeline(pipeline_name, start_status, e, conn)
def stop(): splog.info('SPEVENTT-002',"Event thread stopping ...") stop_event.set() event_thread.join()
def start(): splog.info('SPEVENTT-001',"Event thread starting ...") event_thread.start()
def stop(): splog.info('SPEVENTT-002', "Event thread stopping ...") stop_event.set() event_thread.join()
def get_job( job_class=None, pipeline=None, order=None ): # note that 'job_class' is an alias for 'transition' (seems a better term from the worker view). splog.info( "SPPOSTPR-108", "Job request (job_class=%s,pipeline=%s)" % (job_class, pipeline)) try: conn = spdb.connect() conn.execute('begin immediate') # get job ppprun = spppprdao.get_one_waiting_ppprun( job_class, pipeline, order, conn) # raise exception if no job found # retrieve job metadata from pipeline definition pipeline = sppipeline.get_pipeline(ppprun.pipeline) pipeline.set_current_state(ppprun.state) assert pipeline.get_current_state( ).transition is not None # transition of a waiting job is never None # be sure that transition from ppprun table matches computed transition from pipeline definition # (should always be the case, except if someone perform incorrect manual modifications in the database) assert pipeline.get_current_state( ).transition.name == ppprun.transition generic_args = Bunch(pipeline=ppprun.pipeline, project=ppprun.project, model=ppprun.model, dataset_pattern=ppprun.dataset_pattern, variable=ppprun.variable, data_folder=spconfig.data_folder) # notes: # - job_class and transition are the same (transition is from the finite state machine view, and job_class is from the job consumer view). # - transition must be set in the job, because we need it when doing insertion in jobrun table. job = JOBRun(job_class=ppprun.transition, args=pipeline.get_current_state().transition.get_args( generic_args), error_msg=None, transition=ppprun.transition, start_date=sptime.now(), ppprun_id=ppprun.ppprun_id) # update DB ppprun.error_msg = None # we reset values from previous try if any ppprun.status = spconst.PPPRUN_STATUS_RUNNING ppprun.last_mod_date = sptime.now() spppprdao.update_ppprun(ppprun, conn) conn.commit() splog.info("SPPOSTPR-104", "Job started (ppprun_id=%s)" % str(job.ppprun_id)) return job except NoPostProcessingTaskWaitingException, e: return None # this means no more job to process
def job_done( job ): # note: this method name does not implied that the job complete successfully. It just means the job ended (with or without error). # debug splog.info("SPPOSTPR-200", "DEBUG (%s)" % str(job)) splog.info("SPPOSTPR-210", "Job done (job_class=%s)" % (job.job_class, )) job.end_date = sptime.now() ppprun = spppprdao.get_ppprun(job.ppprun_id) assert ppprun.status == spconst.PPPRUN_STATUS_RUNNING if job.error == False: splog.info( "SPPOSTPR-101", "Job completed successfully (ppprun_id=%s)" % str(job.ppprun_id)) job.status = spconst.JOB_STATUS_DONE # compute new state pipeline = sppipeline.get_pipeline(ppprun.pipeline) pipeline.set_current_state(ppprun.state) pipeline.next( job.transition_return_code ) # as job is done, we move to the next state (next state always exist at this point, else what the point of the job ?) # set DAO to new state ppprun.state = pipeline.get_current_state().source # retrieve the next transition next_transition = pipeline.get_current_state().transition if next_transition is not None: ppprun.transition = next_transition.name ppprun.status = spconst.PPPRUN_STATUS_WAITING else: # if we are here, it means that pipeline is complete ppprun.transition = None ppprun.status = spconst.PPPRUN_STATUS_DONE elif job.error == True: splog.info("SPPOSTPR-102", "Job failed (ppprun_id=%s)" % str(job.ppprun_id)) job.status = spconst.JOB_STATUS_ERROR ppprun.status = spconst.PPPRUN_STATUS_ERROR ppprun.error_msg = job.error_msg ppprun.last_mod_date = sptime.now() # we also store all job attributes in DB in JSON fmt # (some job attributes contain detailed infos of the run, so we keep the all as it may be useful to debug) # job.runlog = json.dumps(job.__dict__) # compute duration job.duration = sptime.compute_duration(job.start_date, job.end_date) # Update DB try: conn = spdb.connect() spppprdao.update_ppprun(ppprun, conn) spjobrdao.add_jobrun(job, conn) if ppprun.status == spconst.PPPRUN_STATUS_DONE: if ppprun.pipeline in pipelinedep.trigger: dependent_pipeline, trigger_type = pipelinedep.trigger[ ppprun.pipeline] trigger_pipeline(ppprun, dependent_pipeline, trigger_type, conn) conn.commit() finally: spdb.disconnect(conn) # if exception occur, we do the rollback here
def run(cls,ppt): splog.info("SPPOSTPR-001","Post-processing task started (%s)"%str(ppt)) cls.start_external_script(ppt) # currently, we only use fork (support for thread without fork (i.e without external process) will be added if needed) ppt.end_date=sptime.now()
def job_done(job): # note: this method name does not implied that the job complete successfully. It just means the job ended (with or without error). # debug splog.info("SPPOSTPR-200","DEBUG (%s)"%str(job)) splog.info("SPPOSTPR-210","Job done (job_class=%s)"%(job.job_class,)) job.end_date=sptime.now() ppprun=spppprdao.get_ppprun(job.ppprun_id) assert ppprun.status==spconst.PPPRUN_STATUS_RUNNING if job.error==False: splog.info("SPPOSTPR-101","Job completed successfully (ppprun_id=%s)"%str(job.ppprun_id)) job.status=spconst.JOB_STATUS_DONE # compute new state pipeline=spppp.get_pipeline(ppprun.pipeline) pipeline.set_current_state(ppprun.state) pipeline.next(job.transition_return_code) # as job is done, we move to the next state (next state always exist at this point, else what the point of the job ?) # set DAO to new state ppprun.state=pipeline.get_current_state().source # retrieve the next transition next_transition=pipeline.get_current_state().transition if next_transition is not None: ppprun.transition=next_transition.name ppprun.status=spconst.PPPRUN_STATUS_WAITING else: # if we are here, it means that pipeline is complete ppprun.transition=None ppprun.status=spconst.PPPRUN_STATUS_DONE elif job.error==True: splog.info("SPPOSTPR-102","Job failed (ppprun_id=%s)"%str(job.ppprun_id)) job.status=spconst.JOB_STATUS_ERROR ppprun.status=spconst.PPPRUN_STATUS_ERROR ppprun.error_msg=job.error_msg ppprun.last_mod_date=sptime.now() # we also store all job attributes in DB in JSON fmt # (some job attributes contain detailed infos of the run, so we keep the all as it may be useful to debug) # job.runlog=json.dumps(job.__dict__) # compute duration job.duration=sptime.compute_duration(job.start_date,job.end_date) # Update DB try: conn=spdb.connect() spppprdao.update_ppprun(ppprun,conn) spjobrdao.add_jobrun(job,conn) # if all variable 'done', switch dataset pipeline from 'pause' to 'waiting' if ppprun.pipeline=='CMIP5_001': # this block must be executed only at the end of CMIP5_001 pipeline if ppprun.status==spconst.PPPRUN_STATUS_DONE: if all_variable_complete(ppprun.dataset_pattern,conn): li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ppprun.dataset_pattern,pipeline='CMIP5_002',conn=conn) if len(li)==1: dataset_ppprun=li[0] if dataset_ppprun.status==spconst.PPPRUN_STATUS_PAUSE: dataset_ppprun.status=spconst.PPPRUN_STATUS_WAITING dataset_ppprun.last_mod_date=sptime.now() spppprdao.update_ppprun(dataset_ppprun,conn) conn.commit() finally: spdb.disconnect(conn) # if exception occur, we do the rollback here
#!/usr/bin/env python import splog splog.info('Foo Bar')
def start(): splog.info('SPEVENTT-001', "Event thread starting ...") event_thread.start()
def job_done(job): # note: this method name does not implied that the job complete successfully. It just means the job ended (with or without error). # debug splog.info("SPPOSTPR-200","DEBUG (%s)"%str(job)) splog.info("SPPOSTPR-210","Job done (job_class=%s)"%(job.job_class,)) job.end_date=sptime.now() ppprun=spppprdao.get_ppprun(job.ppprun_id) assert ppprun.status==spconst.PPPRUN_STATUS_RUNNING if job.error==False: splog.info("SPPOSTPR-101","Job completed successfully (ppprun_id=%s)"%str(job.ppprun_id)) job.status=spconst.JOB_STATUS_DONE # compute new state pipeline=sppipeline.get_pipeline(ppprun.pipeline) pipeline.set_current_state(ppprun.state) pipeline.next(job.transition_return_code) # as job is done, we move to the next state (next state always exist at this point, else what the point of the job ?) # set DAO to new state ppprun.state=pipeline.get_current_state().source # retrieve the next transition next_transition=pipeline.get_current_state().transition if next_transition is not None: ppprun.transition=next_transition.name ppprun.status=spconst.PPPRUN_STATUS_WAITING else: # if we are here, it means that pipeline is complete ppprun.transition=None ppprun.status=spconst.PPPRUN_STATUS_DONE elif job.error==True: splog.info("SPPOSTPR-102","Job failed (ppprun_id=%s)"%str(job.ppprun_id)) job.status=spconst.JOB_STATUS_ERROR ppprun.status=spconst.PPPRUN_STATUS_ERROR ppprun.error_msg=job.error_msg ppprun.last_mod_date=sptime.now() # we also store all job attributes in DB in JSON fmt # (some job attributes contain detailed infos of the run, so we keep the all as it may be useful to debug) # job.runlog=json.dumps(job.__dict__) # compute duration job.duration=sptime.compute_duration(job.start_date,job.end_date) # Update DB try: conn=spdb.connect() spppprdao.update_ppprun(ppprun,conn) spjobrdao.add_jobrun(job,conn) if ppprun.status==spconst.PPPRUN_STATUS_DONE: if ppprun.pipeline in pipelinedep.trigger: dependent_pipeline,trigger_type=pipelinedep.trigger[ppprun.pipeline] trigger_pipeline(ppprun,dependent_pipeline,trigger_type,conn) conn.commit() finally: spdb.disconnect(conn) # if exception occur, we do the rollback here