Beispiel #1
0
def consume_events():
    try:
        events = speventdao.get_events(
            status=spconst.EVENT_STATUS_NEW,
            limit=20)  # process 20 events at a time (arbitrary)
        if len(events) > 0:
            try:
                conn = spdb.connect()
                conn.execute(
                    'begin immediate')  # transaction begin (full db lock)

                for e in events:
                    process_event(e, conn)

                # Check
                # (at this point, all events statuses should be !EVENT_STATUS_NEW)
                li = [
                    e for e in events
                    if e.status in [spconst.EVENT_STATUS_NEW]
                ]
                assert len(li) == 0

                # Switch processed events status in DB
                splog.info('SPEVENTT-003',
                           "%i event(s) processed" % len(events))
                speventdao.update_events(events, conn)

                conn.commit()  # transaction end
            finally:
                spdb.disconnect(
                    conn)  # if exception occur, we do the rollback here

    except Exception, e:
        traceback.print_exc(file=open(spconfig.stacktrace_log_file, "a"))
Beispiel #2
0
def disconnect(conn):
    if is_connected(conn):
        conn.close()

    # hack
    #
    # force sqlite db file to be group writable
    #
    # It should be done with umask when creating the db, but seems not working due to a bug.
    #
    # more info
    #   http://www.mail-archive.com/[email protected]/msg59080.html
    #   https://code.djangoproject.com/ticket/19292
    #
    if os.path.exists(spconfig.db_file):
        if not sptools.is_group_writable(spconfig.db_file):
            if sptools.set_file_permission(spconfig.db_file):
                splog.info(
                    "SPDATABA-003",
                    "File permissions have been modified ('%s')" %
                    spconfig.db_file)
            else:
                # we come here when user have not enough priviledge to set file permission

                splog.info(
                    "SPDATABA-004",
                    "Missing privilege to modify file permissions ('%s')" %
                    spconfig.db_file)
Beispiel #3
0
def consume_events():
    try:
        events=speventdao.get_events(status=spconst.EVENT_STATUS_NEW,limit=20) # process 20 events at a time (arbitrary)
        if len(events)>0:
            try:
                conn=spdb.connect()
                conn.execute('begin immediate') # transaction begin (full db lock)

                for e in events:
                    process_event(e,conn)

                # Check
                # (at this point, all events statuses should be !EVENT_STATUS_NEW)
                li=[e for e in events if e.status in [spconst.EVENT_STATUS_NEW]]
                assert len(li)==0
                
                # Switch processed events status in DB
                splog.info('SPEVENTT-003',"%i event(s) processed"%len(events))
                speventdao.update_events(events,conn)

                conn.commit() # transaction end
            finally:
                spdb.disconnect(conn) # if exception occur, we do the rollback here

    except Exception, e:
        traceback.print_exc(file=open(spconfig.stacktrace_log_file,"a"))
Beispiel #4
0
def start():
    try:
        http_server.serve_forever()
    except KeyboardInterrupt:
        splog.info('SPRPCSRV-001','http_server.serve_forever stopped by KeyboardInterrupt')
        http_server.shutdown()
    except SystemExit:
        splog.info('SPRPCSRV-002','http_server.serve_forever stopped by SystemExit')
        http_server.shutdown()
Beispiel #5
0
    def run(cls, ppt):
        splog.info("SPPOSTPR-001",
                   "Post-processing task started (%s)" % str(ppt))

        cls.start_external_script(
            ppt
        )  # currently, we only use fork (support for thread without fork (i.e without external process) will be added if needed)

        ppt.end_date = sptime.now()
Beispiel #6
0
    def start_external_script(cls,ppt):
        (status,stdout,stderr)=sputils.get_status_output(ppt.get_command_line(),shell=True) # fork is blocking here, so thread will wait until external process complete
        ppt.script_exit_status=status
        if status==0:
            ppt.status=spconst.JOB_STATUS_DONE

            splog.info("SPPOSTPR-002","Post-processing task successfully completed (%s)"%str(ppt))
        else:
            ppt.status=spconst.JOB_STATUS_ERROR
            ppt.error_msg="Error occurs in external script"

            splog.info("SPPOSTPR-004","Post-processing task completed with error(s) (%s)"%str(ppt))
Beispiel #7
0
def get_job(job_class=None,pipeline=None,order=None): # note that 'job_class' is an alias for 'transition' (seems a better term from the worker view).
    splog.info("SPPOSTPR-108","Job request (job_class=%s,pipeline=%s)"%(job_class,pipeline))

    try:
        conn=spdb.connect()
        conn.execute('begin immediate')

        # get job
        ppprun=spppprdao.get_one_waiting_ppprun(job_class,pipeline,order,conn) # raise exception if no job found

        # retrieve job metadata from pipeline definition
        pipeline=spppp.get_pipeline(ppprun.pipeline)
        pipeline.set_current_state(ppprun.state)
        folder=pipeline.get_current_state().transition.workdir

        # dataset_pattern resolution (when possible (e.g. for 'merge' it is not possible as we go from TWO src dir (i.e. 'output12'), so we need to keep the '*' char))
        #
        # TODO: find an elegant way to manage /*/ tranformation (i.e. to /process/ for the axis_normal case). Maybe move this logic into spppp.py.
        #
        dataset_pattern=ppprun.dataset_pattern.replace('/*/','/'+folder+'/')

        # prepare argument to make it easier for the job
        if ppprun.variable=='':
            arg='%s/%s/'%(spconfig.data_folder,dataset_pattern)
        else:
            arg='%s/%s/%s/'%(spconfig.data_folder,dataset_pattern,ppprun.variable)

        # notes: 
        #  - job_class and transition are the same (transition is from the FSM view, and job_class is from the job consumer view).
        #  - transition must be set the the job, because we need it when doing insertion in jobrun table.
        job=JOBRun(job_class=ppprun.transition,
                full_path_variable=arg, # TODO: rename full_path_variable into generic name (matching both variable and dataset only path)
                error_msg=None,
                transition=ppprun.transition,
                dataset_pattern=dataset_pattern,
                variable=ppprun.variable,
                start_date=sptime.now(),
                ppprun_id=ppprun.ppprun_id)

        # update DB
        ppprun.error_msg=None # we reset values from previous try if any
        ppprun.status=spconst.PPPRUN_STATUS_RUNNING
        ppprun.last_mod_date=sptime.now()

        spppprdao.update_ppprun(ppprun,conn)
        conn.commit()

        splog.info("SPPOSTPR-104","Job started (ppprun_id=%s)"%str(job.ppprun_id))

        return job

    except NoPostProcessingTaskWaitingException, e:
        return None # this means no more job to process
Beispiel #8
0
def get_job(job_class=None,pipeline=None,order=None): # note that 'job_class' is an alias for 'transition' (seems a better term from the worker view).
    splog.info("SPPOSTPR-108","Job request (job_class=%s,pipeline=%s)"%(job_class,pipeline))

    try:
        conn=spdb.connect()
        conn.execute('begin immediate')

        # get job
        ppprun=spppprdao.get_one_waiting_ppprun(job_class,pipeline,order,conn) # raise exception if no job found

        # retrieve job metadata from pipeline definition
        pipeline=sppipeline.get_pipeline(ppprun.pipeline)
        pipeline.set_current_state(ppprun.state)

        assert pipeline.get_current_state().transition is not None # transition of a waiting job is never None

        # be sure that transition from ppprun table matches computed transition from pipeline definition
        # (should always be the case, except if someone perform incorrect manual modifications in the database)
        assert pipeline.get_current_state().transition.name==ppprun.transition

        generic_args=Bunch(pipeline=ppprun.pipeline,
                           project=ppprun.project,
                           model=ppprun.model,
                           dataset_pattern=ppprun.dataset_pattern,
                           variable=ppprun.variable,
                           data_folder=spconfig.data_folder)

        # notes: 
        #  - job_class and transition are the same (transition is from the finite state machine view, and job_class is from the job consumer view).
        #  - transition must be set in the job, because we need it when doing insertion in jobrun table.
        job=JOBRun(job_class=ppprun.transition,
                args=pipeline.get_current_state().transition.get_args(generic_args),
                error_msg=None,
                transition=ppprun.transition,
                start_date=sptime.now(),
                ppprun_id=ppprun.ppprun_id)

        # update DB
        ppprun.error_msg=None # we reset values from previous try if any
        ppprun.status=spconst.PPPRUN_STATUS_RUNNING
        ppprun.last_mod_date=sptime.now()

        spppprdao.update_ppprun(ppprun,conn)
        conn.commit()

        splog.info("SPPOSTPR-104","Job started (ppprun_id=%s)"%str(job.ppprun_id))

        return job

    except NoPostProcessingTaskWaitingException, e:
        return None # this means no more job to process
Beispiel #9
0
def add_ppprun(pipeline,status,project,model,dataset_pattern,variable,conn):
    if spppprdao.exists_ppprun(PPPRun(pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable),conn):

        # retrieve pipeline from db
        pppruns=spppprdao.get_pppruns(order='fifo',pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable,conn=conn)

        if len(pppruns)!=1:
            raise SPException("SPPOSTPR-440","Incorrect number of runs (number_of_runs=%i,pipeline=%s,dataset_pattern=%s,variable=%s)"%(len(pppruns),pipeline,dataset_pattern,variable))
        else:
            ppprun=pppruns[0]

            # check existing pipeline state (if state do not allow us to restart it, we raise PipelineRunningException)
            if pipeline=='CMIP5_001':
                if ppprun.status==spconst.PPPRUN_STATUS_DONE:
                    pass
                else:
                    raise PipelineRunningException()
            elif pipeline=='CMIP5_002':
                if ppprun.status==spconst.PPPRUN_STATUS_DONE:
                    pass
                elif ppprun.status==spconst.PPPRUN_STATUS_PAUSE:
                    if ppprun.state=='S1100': # be sure we are at the beginning of the pipe (as 'pausing' is a status that may occurs anywhere in the pipeline). TODO: replace hardcoded S1100 with pipeline first state (as state name can change in the future)
                        # note that in this case, we update the pipe, but it doesn't hange anything as the pipe is already in the right state, 
                        pass
                    else:
                        raise PipelineRunningException()
                else:
                    raise PipelineRunningException()
            else:
                raise SPException('SPPOSTPR-450','Unknown pipeline (%s)'%pipeline)

            # retrieve pipeline definition (note that code below is not reentrant/threadsafe: it works only because execution mode is serial (i.e. non parallel))
            pipeline=spppp.get_pipeline(pipeline)
            pipeline.reset()
            state=pipeline.get_current_state().source
            transition=pipeline.get_current_state().transition

            # set new values
            ppprun.state=state
            ppprun.transition=transition.name
            ppprun.status=status
            ppprun.error_msg=None
            ppprun.last_mod_date=sptime.now()

            # save
            spppprdao.update_ppprun(ppprun,conn)
            splog.info("SPPOSTPR-202","Pipeline updated (%s)"%str(ppprun))
    else:
        ppprun=build_ppprun(pipeline,status,project,model,dataset_pattern,variable)
        id_=spppprdao.add_ppprun(ppprun,conn) # autoincrement field is stored in 'id_'. Not used for now.
        splog.info('SPPOSTPR-052','New pipeline added (%s,%s,%s,%s,%s,%s)'%(pipeline,status,project,model,dataset_pattern,variable))
Beispiel #10
0
def trigger_pipeline(ending,dependent_pipeline,trigger_type,conn): # 'ending' is an alias for the pipeline which just ends
    li=[]

    if trigger_type==spconst.TRIGGER_TYPE_NV2D:
        if all_variable_complete(ending.pipeline,ending.dataset_pattern,conn):
            # all sibling variable pipelines are complete

            # retrieve dataset ppprun
            li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn)
        else:
            # some variable pipeline are not complete

            # nothing to do
            pass
    elif trigger_type==spconst.TRIGGER_TYPE_V2V:
        li=spppprdao.get_pppruns(order='fifo',variable=ending.variable,dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn)

        if len(li)<1:
            splog.info("SPPOSTPR-264","Dep not found (%s,%s,%s)"%(dependent_pipeline,ending.variable,ending.dataset_pattern))
    elif trigger_type==spconst.TRIGGER_TYPE_D2D:
        li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn)

        if len(li)<1:
            splog.info("SPPOSTPR-262","Dep not found (%s,%s)"%(dependent_pipeline,ending.dataset_pattern))
    elif trigger_type==spconst.TRIGGER_TYPE_D2NV:
        li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ending.dataset_pattern,pipeline=dependent_pipeline,conn=conn)

        if len(li)<1:
            splog.info("SPPOSTPR-268","Dep not found (%s,%s)"%(dependent_pipeline,ending.dataset_pattern))
    else:
        splog.info("SPPOSTPR-201","We shouldn't be here (%s,%s)"%(ending.variable,ending.dataset_pattern))


    for ppprun in li:
        pause_to_waiting(ppprun,conn)
Beispiel #11
0
def terminate(signum, frame):
    import splog, speventthread # both must be here because of double-fork (i.e. we can't move import at the top of this file, because the first import must occur in 'main_loop' func). speventthread too, because speventthread do use splog.

    splog.info('SPDAEMON-038',"Daemon stopping ...")

    # stop event thread
    if spconfig.config.get('daemon','eventthread')=='1':
        speventthread.stop()

    # stop HTTP server
    raise SystemExit()

    # Code below is for a 'while loop' based daemon
    """
Beispiel #12
0
def terminate(signum, frame):
    import splog, speventthread  # both must be here because of double-fork (i.e. we can't move import at the top of this file, because the first import must occur in 'main_loop' func). speventthread too, because speventthread do use splog.

    splog.info('SPDAEMON-038', "Daemon stopping ...")

    # stop event thread
    if spconfig.config.get('daemon', 'eventthread') == '1':
        speventthread.stop()

    # stop HTTP server
    raise SystemExit()

    # Code below is for a 'while loop' based daemon
    """
Beispiel #13
0
def start():
    try:
        http_server.serve_forever()
    except KeyboardInterrupt:
        splog.info('SPRPCSRV-001',
                   'http_server.serve_forever stopped by KeyboardInterrupt')
        http_server.shutdown()
    except SystemExit:
        # normal termination

        splog.info('SPRPCSRV-002',
                   'http_server.serve_forever stopped by SystemExit')

        http_server.shutdown()
Beispiel #14
0
def trigger_pipeline(
        ending, dependent_pipeline, trigger_type,
        conn):  # 'ending' is an alias for the pipeline which just ends
    li = []

    if trigger_type == spconst.TRIGGER_TYPE_NV2D:
        if all_variable_complete(ending.pipeline, ending.dataset_pattern,
                                 conn):
            # all sibling variable pipelines are complete

            # retrieve dataset ppprun
            li = spppprdao.get_pppruns(order='fifo',
                                       dataset_pattern=ending.dataset_pattern,
                                       pipeline=dependent_pipeline,
                                       conn=conn)
        else:
            # some variable pipeline are not complete

            # nothing to do
            pass
    elif trigger_type == spconst.TRIGGER_TYPE_V2V:
        li = spppprdao.get_pppruns(order='fifo',
                                   variable=ending.variable,
                                   dataset_pattern=ending.dataset_pattern,
                                   pipeline=dependent_pipeline,
                                   conn=conn)

        if len(li) < 1:
            splog.info(
                "SPPOSTPR-264", "Dep not found (%s,%s,%s)" %
                (dependent_pipeline, ending.variable, ending.dataset_pattern))
    elif trigger_type == spconst.TRIGGER_TYPE_D2D:
        li = spppprdao.get_pppruns(order='fifo',
                                   dataset_pattern=ending.dataset_pattern,
                                   pipeline=dependent_pipeline,
                                   conn=conn)

        if len(li) < 1:
            splog.info(
                "SPPOSTPR-262", "Dep not found (%s,%s)" %
                (dependent_pipeline, ending.dataset_pattern))
    elif trigger_type == spconst.TRIGGER_TYPE_D2NV:
        li = spppprdao.get_pppruns(order='fifo',
                                   dataset_pattern=ending.dataset_pattern,
                                   pipeline=dependent_pipeline,
                                   conn=conn)

        if len(li) < 1:
            splog.info(
                "SPPOSTPR-268", "Dep not found (%s,%s)" %
                (dependent_pipeline, ending.dataset_pattern))
    else:
        splog.info(
            "SPPOSTPR-201", "We shouldn't be here (%s,%s)" %
            (ending.variable, ending.dataset_pattern))

    for ppprun in li:
        pause_to_waiting(ppprun, conn)
Beispiel #15
0
def restart_pipeline(ppprun,status,conn):

    # retrieve pipeline definition (note that code below is not reentrant/threadsafe: it works only because execution mode is serial (i.e. non parallel))
    p=sppipeline.get_pipeline(ppprun.pipeline)
    p.reset()
    state=p.get_current_state().source
    transition=p.get_current_state().transition

    # set new values
    ppprun.state=state
    ppprun.transition=transition.name
    ppprun.status=status
    ppprun.error_msg=None
    ppprun.last_mod_date=sptime.now()

    # save
    spppprdao.update_ppprun(ppprun,conn)
    splog.info("SPPOSTPR-202","Pipeline updated (%s)"%str(ppprun))
Beispiel #16
0
def restart_pipeline(ppprun, status, conn):

    # retrieve pipeline definition (note that code below is not reentrant/threadsafe: it works only because execution mode is serial (i.e. non parallel))
    p = sppipeline.get_pipeline(ppprun.pipeline)
    p.reset()
    state = p.get_current_state().source
    transition = p.get_current_state().transition

    # set new values
    ppprun.state = state
    ppprun.transition = transition.name
    ppprun.status = status
    ppprun.error_msg = None
    ppprun.last_mod_date = sptime.now()

    # save
    spppprdao.update_ppprun(ppprun, conn)
    splog.info("SPPOSTPR-202", "Pipeline updated (%s)" % str(ppprun))
Beispiel #17
0
    def start_external_script(cls, ppt):
        (status, stdout, stderr) = sputils.get_status_output(
            ppt.get_command_line(), shell=True
        )  # fork is blocking here, so thread will wait until external process complete
        ppt.script_exit_status = status
        if status == 0:
            ppt.status = spconst.JOB_STATUS_DONE

            splog.info(
                "SPPOSTPR-002",
                "Post-processing task successfully completed (%s)" % str(ppt))
        else:
            ppt.status = spconst.JOB_STATUS_ERROR
            ppt.error_msg = "Error occurs in external script"

            splog.info(
                "SPPOSTPR-004",
                "Post-processing task completed with error(s) (%s)" % str(ppt))
Beispiel #18
0
def add_ppprun(pipeline,status,project,model,dataset_pattern,variable,conn):
    if spppprdao.exists_ppprun(PPPRun(pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable),conn):

        # retrieve pipeline from db
        pppruns=spppprdao.get_pppruns(order='fifo',pipeline=pipeline,dataset_pattern=dataset_pattern,variable=variable,conn=conn)

        if len(pppruns)!=1:
            raise SPException("SPPOSTPR-440","Incorrect number of runs (number_of_runs=%i,pipeline=%s,dataset_pattern=%s,variable=%s)"%(len(pppruns),pipeline,dataset_pattern,variable))
        else:
            ppprun=pppruns[0]

            if ppprun.status in [spconst.PPPRUN_STATUS_PAUSE,spconst.PPPRUN_STATUS_DONE]: # check existing pipeline state (if state do not allow us to restart it, we raise PipelineRunningException). This is to prevent a reset on a running pipeline. 'waiting' is not accepted to prevent race condition (job starting just while we are here) => TBC.
                restart_pipeline(ppprun,status,conn)
            else:
                raise PipelineRunningException()

    else:
        ppprun=build_ppprun(pipeline,status,project,model,dataset_pattern,variable)
        id_=spppprdao.add_ppprun(ppprun,conn) # autoincrement field is stored in 'id_'. Not used for now.
        splog.info('SPPOSTPR-052','New pipeline added (%s,%s,%s,%s,%s,%s)'%(pipeline,status,project,model,dataset_pattern,variable))
Beispiel #19
0
def get_new_pipeline_status(start_dependency, dependency_type, e, conn):

    if dependency_type == spconst.TRIGGER_TYPE_D2NV:
        # N to 1 (memo: works in reverse with D2NV). we want to find one dataset from N var.

        v_ = ''  # unset variable so to match the dataset
    elif dependency_type == spconst.TRIGGER_TYPE_D2D:
        # 1 to 1

        v_ = e.variable  # default is ok
    elif dependency_type == spconst.TRIGGER_TYPE_NV2D:
        # 1 to N (memo: works in reverse with NV2D). we want to find N var from one dataset.

        # hack: we force status to PAUSE here.
        #
        # The trigger will happen at the end of the last 'variable' pipeline
        # (in postprocessing.job_done func). A better way maybe is to call
        # 'all_variable_complete' func here. Also maybe merging both dep code
        # (in postprocessing.job_done and here) would improve clarity..
        #
        return spconst.PPPRUN_STATUS_PAUSE

    pipeline_dependency = get_pipeline_dependency(start_dependency,
                                                  e.dataset_pattern, v_,
                                                  conn)  # retrieve dependency
    if pipeline_dependency is not None:
        splog.info(
            'SPEVENTT-046',
            "dependency found in ppprun table (dependency=%s)" %
            (start_dependency, ))
        if pipeline_dependency.status == spconst.PPPRUN_STATUS_DONE:
            splog.info(
                'SPEVENTT-048',
                "Create with WAITING status as dependent pipeline is done (dependency=%s,dataset_pattern=%s,variable=%s)"
                % (start_dependency, e.dataset_pattern, e.variable))
            status = spconst.PPPRUN_STATUS_WAITING
        else:
            splog.info(
                'SPEVENTT-010',
                'Create with PAUSE status as dependent pipeline is not done (dataset_pattern=%s,variable=%s)'
                % (e.dataset_pattern, e.variable))
            status = spconst.PPPRUN_STATUS_PAUSE
    else:
        splog.info(
            'SPEVENTT-018',
            "Create with PAUSE status as dependent pipeline doesn't exist (dataset_pattern=%s,variable=%s)"
            % (e.dataset_pattern, e.variable))
        status = spconst.PPPRUN_STATUS_PAUSE

    return status
Beispiel #20
0
def main_loop():
    import splog, speventthread  # both must be here because of double-fork (speventthread too, because speventthread do use splog)

    splog.info('SPDAEMON-001', "Daemon starting ...")

    import spdb  # this is to create database objects if not done already (must be done beforee starting the rpc server)

    # start event thread
    if spconfig.config.get('daemon', 'eventthread') == '1':
        speventthread.start()

    import sprpcserver
    sprpcserver.start()

    # Code below is for a 'while loop' based daemon
    """
    while quit==0:
        splog.info('SPDAEMON-024',"Daemon running")
        time.sleep(3)
    """

    splog.info('SPDAEMON-034', "Daemon stopped")
Beispiel #21
0
def main_loop():
    import splog, speventthread # both must be here because of double-fork (speventthread too, because speventthread do use splog)

    splog.info('SPDAEMON-001',"Daemon starting ...")

    import spdb # this is to create database objects if not done already (must be done beforee starting the rpc server)

    # start event thread
    if spconfig.config.get('daemon','eventthread')=='1':
        speventthread.start()

    import sprpcserver
    sprpcserver.start()

    # Code below is for a 'while loop' based daemon
    """
    while quit==0:
        splog.info('SPDAEMON-024',"Daemon running")
        time.sleep(3)
    """

    splog.info('SPDAEMON-034',"Daemon stopped")
Beispiel #22
0
def add_ppprun(pipeline, status, project, model, dataset_pattern, variable,
               conn):
    if spppprdao.exists_ppprun(
            PPPRun(pipeline=pipeline,
                   dataset_pattern=dataset_pattern,
                   variable=variable), conn):

        # retrieve pipeline from db
        pppruns = spppprdao.get_pppruns(order='fifo',
                                        pipeline=pipeline,
                                        dataset_pattern=dataset_pattern,
                                        variable=variable,
                                        conn=conn)

        if len(pppruns) != 1:
            raise SPException(
                "SPPOSTPR-440",
                "Incorrect number of runs (number_of_runs=%i,pipeline=%s,dataset_pattern=%s,variable=%s)"
                % (len(pppruns), pipeline, dataset_pattern, variable))
        else:
            ppprun = pppruns[0]

            if ppprun.status in [
                    spconst.PPPRUN_STATUS_PAUSE, spconst.PPPRUN_STATUS_DONE
            ]:  # check existing pipeline state (if state do not allow us to restart it, we raise PipelineRunningException). This is to prevent a reset on a running pipeline. 'waiting' is not accepted to prevent race condition (job starting just while we are here) => TBC.
                restart_pipeline(ppprun, status, conn)
            else:
                raise PipelineRunningException()

    else:
        ppprun = build_ppprun(pipeline, status, project, model,
                              dataset_pattern, variable)
        id_ = spppprdao.add_ppprun(
            ppprun,
            conn)  # autoincrement field is stored in 'id_'. Not used for now.
        splog.info(
            'SPPOSTPR-052', 'New pipeline added (%s,%s,%s,%s,%s,%s)' %
            (pipeline, status, project, model, dataset_pattern, variable))
Beispiel #23
0
def process_event(e, conn):

    # ignore event based on project

    if e.project not in spconst.AUTHORIZED_PROJECT:
        splog.info('SPEVENTT-024', "Event ignored (%s)" % str(e))
        e.status = spconst.EVENT_STATUS_OLD  # mark events as done
        return

    # ignore unknown event

    if e.name not in pipelinedep.event_pipeline_mapping:
        splog.info(
            'SPEVENTT-004',
            "Ignore event as not declared in spbindings file (%s)" % str(e))
        e.status = spconst.EVENT_STATUS_OLD  # mark events as done
        return

    # retrieve the pipeline which is binded to the event

    pipeline_name, start_status = pipelinedep.event_pipeline_mapping[e.name]

    # manage start dependency

    # this is to access the 'trigger' dict from the 'value' side
    reverse_trigger = dict(
        (v[0], (k, v[1])) for k, v in pipelinedep.trigger.iteritems()
    )  # TODO: replace this with a bidirectional dict. Maybe also add loop to allow multiple dependencies.

    if pipeline_name in reverse_trigger:
        splog.info(
            'SPEVENTT-044',
            "starting dependency exists for this pipeline in configuration file (new_pipeline=%s,dependency=%s)"
            % (pipeline_name, reverse_trigger[pipeline_name]))

        # retrieve dependency
        start_dependency = reverse_trigger[pipeline_name][0]
        dependency_type = reverse_trigger[pipeline_name][1]

        start_status = get_new_pipeline_status(start_dependency,
                                               dependency_type, e,
                                               conn)  # override 'start_status'
    else:
        start_dependency = None

    # main

    create_pipeline(pipeline_name, start_status, e, conn)
Beispiel #24
0
def stop():
    splog.info('SPEVENTT-002',"Event thread stopping ...")
    stop_event.set()
    event_thread.join()
Beispiel #25
0
def start():
    splog.info('SPEVENTT-001',"Event thread starting ...")
    event_thread.start()
Beispiel #26
0
def stop():
    splog.info('SPEVENTT-002', "Event thread stopping ...")
    stop_event.set()
    event_thread.join()
Beispiel #27
0
def get_job(
    job_class=None,
    pipeline=None,
    order=None
):  # note that 'job_class' is an alias for 'transition' (seems a better term from the worker view).
    splog.info(
        "SPPOSTPR-108",
        "Job request (job_class=%s,pipeline=%s)" % (job_class, pipeline))

    try:
        conn = spdb.connect()
        conn.execute('begin immediate')

        # get job
        ppprun = spppprdao.get_one_waiting_ppprun(
            job_class, pipeline, order,
            conn)  # raise exception if no job found

        # retrieve job metadata from pipeline definition
        pipeline = sppipeline.get_pipeline(ppprun.pipeline)
        pipeline.set_current_state(ppprun.state)

        assert pipeline.get_current_state(
        ).transition is not None  # transition of a waiting job is never None

        # be sure that transition from ppprun table matches computed transition from pipeline definition
        # (should always be the case, except if someone perform incorrect manual modifications in the database)
        assert pipeline.get_current_state(
        ).transition.name == ppprun.transition

        generic_args = Bunch(pipeline=ppprun.pipeline,
                             project=ppprun.project,
                             model=ppprun.model,
                             dataset_pattern=ppprun.dataset_pattern,
                             variable=ppprun.variable,
                             data_folder=spconfig.data_folder)

        # notes:
        #  - job_class and transition are the same (transition is from the finite state machine view, and job_class is from the job consumer view).
        #  - transition must be set in the job, because we need it when doing insertion in jobrun table.
        job = JOBRun(job_class=ppprun.transition,
                     args=pipeline.get_current_state().transition.get_args(
                         generic_args),
                     error_msg=None,
                     transition=ppprun.transition,
                     start_date=sptime.now(),
                     ppprun_id=ppprun.ppprun_id)

        # update DB
        ppprun.error_msg = None  # we reset values from previous try if any
        ppprun.status = spconst.PPPRUN_STATUS_RUNNING
        ppprun.last_mod_date = sptime.now()

        spppprdao.update_ppprun(ppprun, conn)
        conn.commit()

        splog.info("SPPOSTPR-104",
                   "Job started (ppprun_id=%s)" % str(job.ppprun_id))

        return job

    except NoPostProcessingTaskWaitingException, e:
        return None  # this means no more job to process
Beispiel #28
0
def job_done(
    job
):  # note: this method name does not implied that the job complete successfully. It just means the job ended (with or without error).

    # debug
    splog.info("SPPOSTPR-200", "DEBUG (%s)" % str(job))

    splog.info("SPPOSTPR-210", "Job done (job_class=%s)" % (job.job_class, ))

    job.end_date = sptime.now()

    ppprun = spppprdao.get_ppprun(job.ppprun_id)
    assert ppprun.status == spconst.PPPRUN_STATUS_RUNNING

    if job.error == False:
        splog.info(
            "SPPOSTPR-101",
            "Job completed successfully (ppprun_id=%s)" % str(job.ppprun_id))

        job.status = spconst.JOB_STATUS_DONE

        # compute new state
        pipeline = sppipeline.get_pipeline(ppprun.pipeline)
        pipeline.set_current_state(ppprun.state)
        pipeline.next(
            job.transition_return_code
        )  # as job is done, we move to the next state (next state always exist at this point, else what the point of the job ?)

        # set DAO to new state
        ppprun.state = pipeline.get_current_state().source

        # retrieve the next transition
        next_transition = pipeline.get_current_state().transition

        if next_transition is not None:
            ppprun.transition = next_transition.name
            ppprun.status = spconst.PPPRUN_STATUS_WAITING
        else:
            # if we are here, it means that pipeline is complete

            ppprun.transition = None
            ppprun.status = spconst.PPPRUN_STATUS_DONE

    elif job.error == True:
        splog.info("SPPOSTPR-102",
                   "Job failed (ppprun_id=%s)" % str(job.ppprun_id))
        job.status = spconst.JOB_STATUS_ERROR
        ppprun.status = spconst.PPPRUN_STATUS_ERROR
        ppprun.error_msg = job.error_msg

    ppprun.last_mod_date = sptime.now()

    # we also store all job attributes in DB in JSON fmt
    # (some job attributes contain detailed infos of the run, so we keep the all as it may be useful to debug)
    #
    job.runlog = json.dumps(job.__dict__)

    # compute duration
    job.duration = sptime.compute_duration(job.start_date, job.end_date)

    # Update DB
    try:
        conn = spdb.connect()
        spppprdao.update_ppprun(ppprun, conn)
        spjobrdao.add_jobrun(job, conn)

        if ppprun.status == spconst.PPPRUN_STATUS_DONE:
            if ppprun.pipeline in pipelinedep.trigger:
                dependent_pipeline, trigger_type = pipelinedep.trigger[
                    ppprun.pipeline]
                trigger_pipeline(ppprun, dependent_pipeline, trigger_type,
                                 conn)

        conn.commit()
    finally:
        spdb.disconnect(conn)  # if exception occur, we do the rollback here
Beispiel #29
0
    def run(cls,ppt):
        splog.info("SPPOSTPR-001","Post-processing task started (%s)"%str(ppt))

        cls.start_external_script(ppt) # currently, we only use fork (support for thread without fork (i.e without external process) will be added if needed)

        ppt.end_date=sptime.now()
Beispiel #30
0
def job_done(job): # note: this method name does not implied that the job complete successfully. It just means the job ended (with or without error).

    # debug
    splog.info("SPPOSTPR-200","DEBUG (%s)"%str(job))

    splog.info("SPPOSTPR-210","Job done (job_class=%s)"%(job.job_class,))

    job.end_date=sptime.now()

    ppprun=spppprdao.get_ppprun(job.ppprun_id)
    assert ppprun.status==spconst.PPPRUN_STATUS_RUNNING

    if job.error==False:
        splog.info("SPPOSTPR-101","Job completed successfully (ppprun_id=%s)"%str(job.ppprun_id))

        job.status=spconst.JOB_STATUS_DONE

        # compute new state
        pipeline=spppp.get_pipeline(ppprun.pipeline)
        pipeline.set_current_state(ppprun.state)
        pipeline.next(job.transition_return_code) # as job is done, we move to the next state (next state always exist at this point, else what the point of the job ?)

        # set DAO to new state
        ppprun.state=pipeline.get_current_state().source

        # retrieve the next transition
        next_transition=pipeline.get_current_state().transition

        if next_transition is not None:
            ppprun.transition=next_transition.name
            ppprun.status=spconst.PPPRUN_STATUS_WAITING
        else:
            # if we are here, it means that pipeline is complete

            ppprun.transition=None
            ppprun.status=spconst.PPPRUN_STATUS_DONE

    elif job.error==True:
        splog.info("SPPOSTPR-102","Job failed (ppprun_id=%s)"%str(job.ppprun_id))
        job.status=spconst.JOB_STATUS_ERROR
        ppprun.status=spconst.PPPRUN_STATUS_ERROR
        ppprun.error_msg=job.error_msg

    ppprun.last_mod_date=sptime.now()

    # we also store all job attributes in DB in JSON fmt
    # (some job attributes contain detailed infos of the run, so we keep the all as it may be useful to debug)
    #
    job.runlog=json.dumps(job.__dict__)

    # compute duration
    job.duration=sptime.compute_duration(job.start_date,job.end_date)

    # Update DB
    try:
        conn=spdb.connect()
        spppprdao.update_ppprun(ppprun,conn)
        spjobrdao.add_jobrun(job,conn)


        # if all variable 'done', switch dataset pipeline from 'pause' to 'waiting'
        if ppprun.pipeline=='CMIP5_001': # this block must be executed only at the end of CMIP5_001 pipeline
            if ppprun.status==spconst.PPPRUN_STATUS_DONE:
                if all_variable_complete(ppprun.dataset_pattern,conn):
                    li=spppprdao.get_pppruns(order='fifo',dataset_pattern=ppprun.dataset_pattern,pipeline='CMIP5_002',conn=conn)
                    if len(li)==1:
                        dataset_ppprun=li[0]
                        if dataset_ppprun.status==spconst.PPPRUN_STATUS_PAUSE:

                            dataset_ppprun.status=spconst.PPPRUN_STATUS_WAITING
                            dataset_ppprun.last_mod_date=sptime.now()

                            spppprdao.update_ppprun(dataset_ppprun,conn)


        conn.commit()
    finally:
        spdb.disconnect(conn) # if exception occur, we do the rollback here
Beispiel #31
0
#!/usr/bin/env python

import splog
splog.info('Foo Bar')
Beispiel #32
0
def start():
    splog.info('SPEVENTT-001', "Event thread starting ...")
    event_thread.start()
Beispiel #33
0
def job_done(job): # note: this method name does not implied that the job complete successfully. It just means the job ended (with or without error).

    # debug
    splog.info("SPPOSTPR-200","DEBUG (%s)"%str(job))

    splog.info("SPPOSTPR-210","Job done (job_class=%s)"%(job.job_class,))

    job.end_date=sptime.now()

    ppprun=spppprdao.get_ppprun(job.ppprun_id)
    assert ppprun.status==spconst.PPPRUN_STATUS_RUNNING

    if job.error==False:
        splog.info("SPPOSTPR-101","Job completed successfully (ppprun_id=%s)"%str(job.ppprun_id))

        job.status=spconst.JOB_STATUS_DONE

        # compute new state
        pipeline=sppipeline.get_pipeline(ppprun.pipeline)
        pipeline.set_current_state(ppprun.state)
        pipeline.next(job.transition_return_code) # as job is done, we move to the next state (next state always exist at this point, else what the point of the job ?)

        # set DAO to new state
        ppprun.state=pipeline.get_current_state().source

        # retrieve the next transition
        next_transition=pipeline.get_current_state().transition

        if next_transition is not None:
            ppprun.transition=next_transition.name
            ppprun.status=spconst.PPPRUN_STATUS_WAITING
        else:
            # if we are here, it means that pipeline is complete

            ppprun.transition=None
            ppprun.status=spconst.PPPRUN_STATUS_DONE

    elif job.error==True:
        splog.info("SPPOSTPR-102","Job failed (ppprun_id=%s)"%str(job.ppprun_id))
        job.status=spconst.JOB_STATUS_ERROR
        ppprun.status=spconst.PPPRUN_STATUS_ERROR
        ppprun.error_msg=job.error_msg

    ppprun.last_mod_date=sptime.now()

    # we also store all job attributes in DB in JSON fmt
    # (some job attributes contain detailed infos of the run, so we keep the all as it may be useful to debug)
    #
    job.runlog=json.dumps(job.__dict__)

    # compute duration
    job.duration=sptime.compute_duration(job.start_date,job.end_date)

    # Update DB
    try:
        conn=spdb.connect()
        spppprdao.update_ppprun(ppprun,conn)
        spjobrdao.add_jobrun(job,conn)

        if ppprun.status==spconst.PPPRUN_STATUS_DONE:
            if ppprun.pipeline in pipelinedep.trigger:
                dependent_pipeline,trigger_type=pipelinedep.trigger[ppprun.pipeline]
                trigger_pipeline(ppprun,dependent_pipeline,trigger_type,conn)

        conn.commit()
    finally:
        spdb.disconnect(conn) # if exception occur, we do the rollback here