예제 #1
0
def pre_transfer_check_list(tr):
    """
    Return:
        Check list status

        True: Check list OK
        False: Check list NOK
    """

    if lfae_mode == "keep":
        # usefull mode if
        #  - metadata needs to be regenerated without retransfering the data
        #  - synda files are mixed with files from other sources

        if os.path.isfile(tr.get_full_local_path()):
            # file already here, mark the file as done

            sdlog.info(
                "SYNDTASK-197",
                "Local file already exists: keep it (lfae_mode=keep,local_file=%s)"
                % tr.get_full_local_path())

            tr.status = sdconst.TRANSFER_STATUS_DONE
            tr.error_msg = "Local file already exists: keep it (lfae_mode=keep)"
            tr.end_date = sdtime.now()
            sdfiledao.update_file(
                tr
            )  # note: it is important not to update a running status in this case, else local file non-related with synda may be removed by synda (because of cleanup_running_transfer() func). See mail from Hans Ramthun at 20150331 for more details.

            return False
        else:
            # file not here, start the download

            return True
    elif lfae_mode == "replace":
        if os.path.isfile(tr.get_full_local_path()):
            sdlog.info(
                "SYNDTASK-187",
                "Local file already exists: remove it (lfae_mode=replace,local_file=%s)"
                % tr.get_full_local_path())
            os.remove(tr.get_full_local_path())

        return True
    elif lfae_mode == "abort":
        if os.path.isfile(tr.get_full_local_path()):
            sdlog.info(
                "SYNDTASK-188",
                "Local file already exists: transfer aborted (lfae_mode=abort,local_file=%s)"
                % tr.get_full_local_path())

            tr.status = sdconst.TRANSFER_STATUS_ERROR
            tr.error_msg = "Local file already exists: transfer aborted (lfae_mode=abort)"
            tr.end_date = sdtime.now()
            sdfiledao.update_file(tr)

            return False
        else:
            return True
예제 #2
0
파일: sdtask.py 프로젝트: ncarenton/synda
    def start_transfer(tr):
        """Retrieve next transfer to start
        
        Note
            if no more transfer waiting, get_transfer() raises "NoTransferWaitingException" exception
        """
        def start_transfer_thread(tr):
            sdfiledao.update_file(tr)
            th=WorkerThread(tr,eot_queue,Download)
            th.setDaemon(True) # if main thread quits, we kill running threads (note though that forked child processes are NOT killed and continue running after that !)
            th.start()


        # we reset values from previous try if any
        tr.end_date=None
        tr.error_msg=None
        tr.status=sdconst.TRANSFER_STATUS_RUNNING
        tr.start_date=sdtime.now()

        if lfae_mode=="keep":
            # usefull mode if
            #  - metadata needs to be regenerated without retransfering the data
            #  - synda files are mixed with files from other sources

            if os.path.isfile(tr.get_full_local_path()):
                # file already here, mark the file as done

                sdlog.info("SYNDTASK-197","Local file already exists: keep it (lfae_mode=keep,local_file=%s)"%tr.get_full_local_path())

                tr.status=sdconst.TRANSFER_STATUS_DONE
                tr.error_msg="Local file already exists: keep it (lfae_mode=keep)"
                tr.end_date=sdtime.now()
                sdfiledao.update_file(tr) # note: it is important not to update a running status in this case, else local file non-related with synda may be removed by synda (because of cleanup_running_transfer() func). See mail from Hans Ramthun at 20150331 for more details.
            else:
                # file not here, start the download

                start_transfer_thread(tr)
        elif lfae_mode=="replace":
            if os.path.isfile(tr.get_full_local_path()):
                sdlog.info("SYNDTASK-187","Local file already exists: remove it (lfae_mode=replace,local_file=%s)"%tr.get_full_local_path())
                os.remove(tr.get_full_local_path())

            start_transfer_thread(tr)
        elif lfae_mode=="abort":
            if os.path.isfile(tr.get_full_local_path()):
                tr.status=sdconst.TRANSFER_STATUS_ERROR
                tr.error_msg="Local file already exists: transfer aborted (lfae_mode=abort)"
                tr.end_date=sdtime.now()
                sdfiledao.update_file(tr)
            else:
                start_transfer_thread(tr)
예제 #3
0
파일: sdtask.py 프로젝트: Prodiguer/synda
def pre_transfer_check_list(tr):
    """
    Return:
        Check list status

        True: Check list OK
        False: Check list NOK
    """

    if lfae_mode=="keep":
        # usefull mode if
        #  - metadata needs to be regenerated without retransfering the data
        #  - synda files are mixed with files from other sources

        if os.path.isfile(tr.get_full_local_path()):
            # file already here, mark the file as done

            sdlog.info("SYNDTASK-197","Local file already exists: keep it (lfae_mode=keep,local_file=%s)"%tr.get_full_local_path())

            tr.status=sdconst.TRANSFER_STATUS_DONE
            tr.error_msg="Local file already exists: keep it (lfae_mode=keep)"
            tr.end_date=sdtime.now()
            sdfiledao.update_file(tr) # note: it is important not to update a running status in this case, else local file non-related with synda may be removed by synda (because of cleanup_running_transfer() func). See mail from Hans Ramthun at 20150331 for more details.

            return False
        else:
            # file not here, start the download

            return True
    elif lfae_mode=="replace":
        if os.path.isfile(tr.get_full_local_path()):
            sdlog.info("SYNDTASK-187","Local file already exists: remove it (lfae_mode=replace,local_file=%s)"%tr.get_full_local_path())
            os.remove(tr.get_full_local_path())

        return True
    elif lfae_mode=="abort":
        if os.path.isfile(tr.get_full_local_path()):
            sdlog.info("SYNDTASK-188","Local file already exists: transfer aborted (lfae_mode=abort,local_file=%s)"%tr.get_full_local_path())

            tr.status=sdconst.TRANSFER_STATUS_ERROR
            tr.priority -= 1
            tr.error_msg="Local file already exists: transfer aborted (lfae_mode=abort)"
            tr.end_date=sdtime.now()
            sdfiledao.update_file(tr)

            return False
        else:
            return True
예제 #4
0
def keep_recent_datasets(datasets):
    """This func is a hack."""
    li=[]

    # Note that we use last_mod_date instead of crea_date, so to also
    # try to retrieve timestamp for previously inserted dataset
    # (i.e. dataset which have been modified during this discovery
    # (i.e. new files have been added to the dataset), but which have
    # been created in a previous discovery).
    #
    # We only try to retrieve timestamp for recent datasets (-24H).
    # This is to prevent retrieving timestamp for datasets not related
    # to the current discovery, because for example, there are 20 000
    # datasets without timestamp on VESG4, and we don't want to trigger
    # 20 000 search-API request each time we install a new file !


    for d in datasets:
    
        interval=sdtime.compute_time_delta(d.last_mod_date,sdtime.now())
        if interval > ( 24 * 3600 ):
            # This dataset has not been modified in the last 24 hours, 
            # so it is not related to the current discovery.

            pass
        else:
            li.append(d)

    return li
예제 #5
0
def update_latest_flag(d,force_latest=False):
    """
    Args:
        force_latest: If 'true', force 'latest' to 'true' no matter what the compute_latest_flag() method say)

    Notes
     - warning: this method update the dataset in database (and in some cases, also all other different versions of this datasets)
     - warning: this method modifies 'd' object
    """

    assert not d.latest # this func must NOT be called if the dataset is already 'latest'

    dataset_versions=sddatasetquery.get_dataset_versions(d,True) # retrieves all dataset versions

    d.latest=True if force_latest else compute_latest_flag(dataset_versions,d) # set the *new* value for the 'latest' flag

    if d.latest==True:
        # if we are here, it means latest switch from False to True

        d.latest_date=sdtime.now() # "latest_date" is set when dataset "latest" flag switches from False to True
        switch_off_latest_flag_for_all_other_versions(d.version,dataset_versions) # MOD_A
    else:
        # the latest stay false, do nothing

        pass

    sddatasetdao.update_dataset(d,False,sddb.conn) # MOD_B
    sddb.conn.commit() # commit all datasets modifications together (MOD_A (if any) and MOD_B)
예제 #6
0
def latest_dataset_complete_event(project,
                                  model,
                                  dataset_pattern,
                                  commit=True):
    # this event means latest dataset has been completed (beware: no 'latest switch' event here: was latest before and still is)

    sdlog.log(
        "SYDEVENT-045",
        "'latest_dataset_complete_event' triggered (%s)" % dataset_pattern,
        event_triggered_log_level)

    if project == 'CMIP5':

        # CMIP5 use output12 special event
        return

    if project in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET:

        # CORDEX and CMIP6 use only variable level event
        return

    event = Event(name=sdconst.EVENT_LATEST_DATASET_COMPLETE)
    event.project = project
    event.model = model
    event.dataset_pattern = dataset_pattern
    event.variable = ''
    event.filename_pattern = ''
    event.crea_date = sdtime.now()
    event.priority = sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event, commit=commit)
예제 #7
0
def file_complete_event(tr):
    """
    Note
        when a variable is complete, we know for sure that all variable's files are fetched,
        because a variable is atomic, i.e. it is not possible to retrieve a subset of variable's files
        (this is true because you can't select a subset of the files of a
        variable with the search-API (search-API temporal n spatial filters are
        at variable level without the possibility to ask a subset of the variable's files))
        but a dataset can be marked as complete even if it contains only a subset of variables included in this dataset
        (but still all variables that have been discovered for this dataset must be complete)
    """
    sdlog.log("SYDEVENT-001",
              "'file_complete_event' triggered (%s)" % tr.file_functional_id,
              event_triggered_log_level)

    if sdconfig.is_event_enabled(sdconst.EVENT_FILE_COMPLETE, tr.project):
        event = Event(name=sdconst.EVENT_FILE_COMPLETE)
        event.project = tr.project
        event.model = tr.model
        event.dataset_pattern = tr.dataset.local_path
        event.variable = tr.variable
        event.filename_pattern = tr.filename
        event.crea_date = sdtime.now()
        event.priority = sdconst.DEFAULT_PRIORITY
        sdeventdao.add_event(event, commit=True)

    # update dataset (all except 'latest' flag)
    tr.dataset.status = sddatasetflag.compute_dataset_status(tr.dataset)
    tr.dataset.last_done_transfer_date = tr.end_date
    sddatasetdao.update_dataset(tr.dataset)

    if sdvariable.is_variable_complete(tr.dataset.dataset_id, tr.variable):
        variable_complete_event(
            tr.project, tr.model, tr.dataset,
            tr.variable)  # trigger 'variable complete' event
예제 #8
0
파일: sdevent.py 프로젝트: Prodiguer/synda
def file_complete_event(tr):
    """
    Note
        when a variable is complete, we know for sure that all variable's files are fetched,
        because a variable is atomic, i.e. it is not possible to retrieve a subset of variable's files
        (this is true because you can't select a subset of the files of a
        variable with the search-API (search-API temporal n spatial filters are
        at variable level without the possibility to ask a subset of the variable's files))
        but a dataset can be marked as complete even if it contains only a subset of variables included in this dataset
        (but still all variables that have been discovered for this dataset must be complete)
    """
    sdlog.log("SYDEVENT-001","'file_complete_event' triggered (%s)"%tr.file_functional_id,event_triggered_log_level)

    if sdconfig.is_event_enabled(sdconst.EVENT_FILE_COMPLETE,tr.project):
        event=Event(name=sdconst.EVENT_FILE_COMPLETE)
        event.project=tr.project
        event.model=tr.model
        event.dataset_pattern=tr.dataset.local_path
        event.variable=tr.variable
        event.filename_pattern=tr.filename
        event.crea_date=sdtime.now()
        event.priority=sdconst.DEFAULT_PRIORITY
        sdeventdao.add_event(event,commit=True)

    # update dataset (all except 'latest' flag)
    tr.dataset.status=sddatasetflag.compute_dataset_status(tr.dataset)
    tr.dataset.last_done_transfer_date=tr.end_date
    sddatasetdao.update_dataset(tr.dataset)

    if sdvariable.is_variable_complete(tr.dataset.dataset_id,tr.variable):
        variable_complete_event(tr.project,tr.model,tr.dataset,tr.variable) # trigger 'variable complete' event
예제 #9
0
def prepare_transfer(tr):

    # we reset values from previous try if any
    tr.end_date=None
    tr.error_msg=None
    tr.status=sdconst.TRANSFER_STATUS_RUNNING
    tr.start_date=sdtime.now()
예제 #10
0
def keep_recent_datasets(datasets):
    """This func is a hack."""
    li = []

    # Note that we use last_mod_date instead of crea_date, so to also
    # try to retrieve timestamp for previously inserted dataset
    # (i.e. dataset which have been modified during this discovery
    # (i.e. new files have been added to the dataset), but which have
    # been created in a previous discovery).
    #
    # We only try to retrieve timestamp for recent datasets (-24H).
    # This is to prevent retrieving timestamp for datasets not related
    # to the current discovery, because for example, there are 20 000
    # datasets without timestamp on VESG4, and we don't want to trigger
    # 20 000 search-API request each time we install a new file !

    for d in datasets:

        interval = sdtime.compute_time_delta(d.last_mod_date, sdtime.now())
        if interval > (24 * 3600):
            # This dataset has not been modified in the last 24 hours,
            # so it is not related to the current discovery.

            pass
        else:
            li.append(d)

    return li
예제 #11
0
def update_latest_flag(d, force_latest=False):
    """
    Args:
        force_latest: If 'true', force 'latest' to 'true' no matter what the compute_latest_flag() method say)

    Notes
     - warning: this method update the dataset in database (and in some cases, also all other different versions of this datasets)
     - warning: this method modifies 'd' object
    """

    assert not d.latest  # this func must NOT be called if the dataset is already 'latest'

    dataset_versions = sddatasetquery.get_dataset_versions(
        d, True)  # retrieves all dataset versions

    d.latest = True if force_latest else compute_latest_flag(
        dataset_versions, d)  # set the *new* value for the 'latest' flag

    if d.latest == True:
        # if we are here, it means latest switch from False to True

        d.latest_date = sdtime.now(
        )  # "latest_date" is set when dataset "latest" flag switches from False to True
        switch_off_latest_flag_for_all_other_versions(
            d.version, dataset_versions)  # MOD_A
    else:
        # the latest stay false, do nothing

        pass

    sddatasetdao.update_dataset(d, False, sddb.conn)  # MOD_B
    sddb.conn.commit(
    )  # commit all datasets modifications together (MOD_A (if any) and MOD_B)
예제 #12
0
def submit(
    order_name,
    project,
    model,
    dataset,
    variable='',
    filename='',
    commit=True
):  # TODO: replace single quote with None and move 'None2SingleQuote' processing inside Event object (and add comment about why we use single quote instead of None in event table !!!)

    event_name = order_name

    dataset_pattern = sdproduct.replace_output12_product_with_wildcard(dataset)
    filename_pattern = filename

    sdlog.info(
        "SDPPORDE-001",
        "'%s' triggered (%s,%s)" % (event_name, dataset_pattern, variable))

    event = Event(name=event_name)

    event.project = project
    event.model = model
    event.dataset_pattern = dataset_pattern
    event.variable = variable
    event.filename_pattern = filename_pattern
    event.crea_date = sdtime.now()
    event.priority = sdconst.DEFAULT_PRIORITY

    sdeventdao.add_event(event, commit=commit)
예제 #13
0
def prepare_transfer(tr):

    # we reset values from previous try if any
    tr.end_date=None
    tr.error_msg=None
    tr.status=sdconst.TRANSFER_STATUS_RUNNING
    tr.start_date=sdtime.now()
예제 #14
0
def add_history_line(action,selection_filename=None,insertion_group_id=None,crea_date=None,selection_file_checksum=None,conn=sddb.conn):

    crea_date=sdtime.now() if crea_date is None else crea_date

    c = conn.cursor()
    c.execute("insert into history (action, selection_filename, crea_date, insertion_group_id, selection_file_checksum) values (?,?,?,?,?)",(action, selection_filename, crea_date, insertion_group_id,selection_file_checksum))
    c.close()
    conn.commit()
예제 #15
0
    def run(cls,tr):
        cls.start_transfer_script(tr)
        tr.end_date=sdtime.now()

        # compute metrics
        if tr.status==sdconst.TRANSFER_STATUS_DONE:
            tr.duration=sdtime.compute_duration(tr.start_date,tr.end_date)
            tr.rate=sdtools.compute_rate(tr.size,tr.duration)
예제 #16
0
def add_file(f):
    sdlog.info("SDENQUEU-003","Create transfer (local_path=%s,url=%s)"%(f.get_full_local_path(),f.url))

    f.dataset_id=add_dataset(f)
    f.status=sdconst.TRANSFER_STATUS_WAITING
    f.crea_date=sdtime.now()

    sdfiledao.add_file(f,commit=False)
예제 #17
0
파일: sddao.py 프로젝트: cedadev/synda
def store_dataset_export_event(d, conn=sddb.conn):
    c = conn.cursor()
    c.execute("insert into export (dataset_id,export_date) values (?,?)", (
        d.dataset_id,
        sdtime.now(),
    ))
    conn.commit()
    c.close()
예제 #18
0
def add_file(f):
    sdlog.info(
        "SDENQUEU-003", "Create transfer (local_path=%s,url=%s)" %
        (f.get_full_local_path(), f.url))

    f.dataset_id = add_dataset(f)
    f.status = sdconst.TRANSFER_STATUS_WAITING
    f.crea_date = sdtime.now()

    sdfiledao.add_file(f, commit=False)
예제 #19
0
def variable_complete_event(project, model, dataset, variable, commit=True):
    sdlog.log(
        "SYDEVENT-002", "'variable_complete_event' triggered (%s,%s)" %
        (dataset.dataset_functional_id, variable), event_triggered_log_level)

    if sdconfig.is_event_enabled(sdconst.EVENT_VARIABLE_COMPLETE, project):
        event = Event(name=sdconst.EVENT_VARIABLE_COMPLETE)
        event.project = project
        event.model = model
        event.dataset_pattern = dataset.local_path
        event.variable = variable
        event.filename_pattern = ''
        event.crea_date = sdtime.now()
        event.priority = sdconst.DEFAULT_PRIORITY
        sdeventdao.add_event(event, commit=commit)

    # cascade 1 (trigger dataset event)
    if dataset.status == sdconst.DATASET_STATUS_COMPLETE:
        dataset_complete_event(project, model,
                               dataset)  # trigger 'dataset complete' event

    # cascade 2 (trigger variable output12 event)
    if project == 'CMIP5':

        if '/output/' in dataset.path:
            return

        (ds_path_output1,
         ds_path_output2) = sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(
                path=ds_path_output1) and sddatasetdao.exists_dataset(
                    path=ds_path_output2):

            d1 = sddatasetdao.get_dataset(path=ds_path_output1)
            d2 = sddatasetdao.get_dataset(path=ds_path_output2)

            if sdvariable.is_variable_complete(
                    d1.dataset_id,
                    variable) and sdvariable.is_variable_complete(
                        d2.dataset_id, variable):
                dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                    dataset.local_path)
                variable_complete_output12_event(
                    project, model, dataset_pattern,
                    variable)  # trigger event (cross dataset event)
        else:
            # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true)

            dataset_pattern = sdproduct.replace_output12_product_with_wildcard(
                dataset.local_path)
            variable_complete_output12_event(
                project, model, dataset_pattern,
                variable)  # trigger event (cross dataset event)
예제 #20
0
파일: sdevent.py 프로젝트: ncarenton/synda
def variable_complete_output12_event(project,model,dataset_pattern,variable,commit=True):
    sdlog.info("SYDEVENT-003","'variable_complete_output12_event' triggered (%s,%s)"%(dataset_pattern,variable))

    event=Event(name=sdconst.EVENT_OUTPUT12_VARIABLE_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=variable
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
예제 #21
0
파일: sdevent.py 프로젝트: ncarenton/synda
def latest_dataset_complete_output12_event(project,model,dataset_pattern,commit=True):
    # this event means one latest dataset has been completed (i.e. was latest before and still is)

    sdlog.info("SYDEVENT-006","'latest_dataset_complete_output12_event' triggered (%s)"%dataset_pattern)

    event=Event(name=sdconst.EVENT_OUTPUT12_LATEST_DATASET_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
예제 #22
0
def variable_complete_output12_event(project,
                                     model,
                                     dataset_pattern,
                                     variable,
                                     commit=True):
    sdlog.log(
        "SYDEVENT-003",
        "'variable_complete_output12_event' triggered (%s,%s)" %
        (dataset_pattern, variable), event_triggered_log_level)

    event = Event(name=sdconst.EVENT_OUTPUT12_VARIABLE_COMPLETE)
    event.project = project
    event.model = model
    event.dataset_pattern = dataset_pattern
    event.variable = variable
    event.filename_pattern = ''
    event.crea_date = sdtime.now()
    event.priority = sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event, commit=commit)
예제 #23
0
def latest_output12_dataset_complete_event(project,
                                           model,
                                           dataset_pattern,
                                           commit=True):
    # this event means latest output12 dataset has been completed (beware: no 'latest switch' event here: was latest before and still is)

    sdlog.log(
        "SYDEVENT-006",
        "'latest_output12_dataset_complete_event' triggered (%s)" %
        dataset_pattern, event_triggered_log_level)

    event = Event(name=sdconst.EVENT_OUTPUT12_LATEST_DATASET_COMPLETE)
    event.project = project
    event.model = model
    event.dataset_pattern = dataset_pattern
    event.variable = ''
    event.filename_pattern = ''
    event.crea_date = sdtime.now()
    event.priority = sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event, commit=commit)
예제 #24
0
def submit(order_name,project,model,dataset,variable='',filename='',commit=True): # TODO: replace single quote with None and move 'None2SingleQuote' processing inside Event object (and add comment about why we use single quote instead of None in event table !!!)

    event_name=order_name

    dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset)
    filename_pattern=filename

    sdlog.info("SDPPORDE-001","'%s' triggered (%s,%s)"%(event_name,dataset_pattern,variable))

    event=Event(name=event_name)

    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=variable
    event.filename_pattern=filename_pattern
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY

    sdeventdao.add_event(event,commit=commit)
예제 #25
0
파일: sdevent.py 프로젝트: Prodiguer/synda
def variable_complete_event(project,model,dataset,variable,commit=True):
    sdlog.log("SYDEVENT-002","'variable_complete_event' triggered (%s,%s)"%(dataset.dataset_functional_id,variable),event_triggered_log_level)

    if sdconfig.is_event_enabled(sdconst.EVENT_VARIABLE_COMPLETE,project):
        event=Event(name=sdconst.EVENT_VARIABLE_COMPLETE)
        event.project=project
        event.model=model
        event.dataset_pattern=dataset.local_path
        event.variable=variable
        event.filename_pattern=''
        event.crea_date=sdtime.now()
        event.priority=sdconst.DEFAULT_PRIORITY
        sdeventdao.add_event(event,commit=commit)

    # cascade 1 (trigger dataset event)
    if dataset.status==sdconst.DATASET_STATUS_COMPLETE:
        dataset_complete_event(project,model,dataset) # trigger 'dataset complete' event

    # cascade 2 (trigger variable output12 event)
    if project=='CMIP5':

        assert '/output/' not in dataset.path

        (ds_path_output1,ds_path_output2)=sdproduct.get_output12_dataset_paths(dataset.path)
        if sddatasetdao.exists_dataset(path=ds_path_output1) and sddatasetdao.exists_dataset(path=ds_path_output2):

            d1=sddatasetdao.get_dataset(path=ds_path_output1)
            d2=sddatasetdao.get_dataset(path=ds_path_output2)

            if sdvariable.is_variable_complete(d1.dataset_id,variable) and sdvariable.is_variable_complete(d2.dataset_id,variable):
                dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
                variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
        else:
            # we also trigger the 'variable_complete_output12_event' event if the variable is over one product only (because if only one product, then output12 event is also true)

            dataset_pattern=sdproduct.replace_output12_product_with_wildcard(dataset.local_path)
            variable_complete_output12_event(project,model,dataset_pattern,variable) # trigger event (cross dataset event)
예제 #26
0
파일: sdevent.py 프로젝트: Prodiguer/synda
def latest_dataset_complete_event(project,model,dataset_pattern,commit=True):
    # this event means latest dataset has been completed (beware: no 'latest switch' event here: was latest before and still is)

    sdlog.log("SYDEVENT-045","'latest_dataset_complete_event' triggered (%s)"%dataset_pattern,event_triggered_log_level)

    if project=='CMIP5':

        # CMIP5 use output12 special event
        return

    if project in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET:

        # CORDEX and CMIP6 use only variable level event
        return

    event=Event(name=sdconst.EVENT_LATEST_DATASET_COMPLETE)
    event.project=project
    event.model=model
    event.dataset_pattern=dataset_pattern
    event.variable=''
    event.filename_pattern=''
    event.crea_date=sdtime.now()
    event.priority=sdconst.DEFAULT_PRIORITY
    sdeventdao.add_event(event,commit=commit)
예제 #27
0
    def start_transfer_script(cls,tr):

        sdlog.info("JFPDMDEF-001","Will download url=%s"%(tr.url,))
        if sdconfig.fake_download:
            tr.status=sdconst.TRANSFER_STATUS_DONE
            tr.error_msg=""
            tr.sdget_error_msg=""
            return

        # main
        (tr.sdget_status,killed,tr.sdget_error_msg)=sdget.download(tr.url,
                                                                   tr.get_full_local_path(),
                                                                   debug=False,
                                                                   http_client=sdconst.HTTP_CLIENT_WGET,
                                                                   timeout=sdconst.ASYNC_DOWNLOAD_HTTP_TIMEOUT,
                                                                   verbosity=0,
                                                                   buffered=True,
                                                                   hpss=hpss)


        # check
        assert tr.size is not None

        # compute metrics
        tr.end_date=sdtime.now()
        tr.duration=sdtime.compute_duration(tr.start_date,tr.end_date)
        tr.rate=sdtools.compute_rate(tr.size,tr.duration)

        # post-processing
        if tr.sdget_status==0:

            if int(tr.size) != os.path.getsize(tr.get_full_local_path()):
                sdlog.error("SDDMDEFA-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path()))

            # retrieve remote checksum
            remote_checksum=tr.checksum

            if remote_checksum!=None:
                # remote checksum exists

                # compute local checksum
                checksum_type=tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5 # fallback to 'md5' (arbitrary)
                local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type)

                # compare local and remote checksum
                if remote_checksum==local_checksum:
                    # checksum is ok

                    tr.status=sdconst.TRANSFER_STATUS_DONE
                    tr.error_msg=""
                else:
                    # checksum is not ok

                    if incorrect_checksum_action=="remove":
                        tr.status=sdconst.TRANSFER_STATUS_ERROR
                        tr.error_msg="File corruption detected: local checksum doesn't match remote checksum"

                        # remove file from local repository
                        sdlog.error("SDDMDEFA-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path()))
                        try:
                            os.remove(tr.get_full_local_path())
                        except Exception,e:
                            sdlog.error("SDDMDEFA-158","error occurs while removing local file (%s)"%tr.get_full_local_path())

                    elif incorrect_checksum_action=="keep":
                        sdlog.info("SDDMDEFA-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path())
                        
                        tr.status=sdconst.TRANSFER_STATUS_DONE
                        tr.error_msg=""
                    else:
                        raise sdexception.FatalException("SDDMDEFA-507","incorrect value (%s)"%incorrect_checksum_action)
예제 #28
0
 def run(cls,tr):
     cls.start_transfer_script(tr)
     tr.end_date=sdtime.now()
예제 #29
0
def add_dataset(f):
    """
    Returns:
        dataset_id
    """
    d = sddatasetdao.get_dataset(dataset_functional_id=f.dataset_functional_id)
    if d is not None:

        # check dataset local path format
        #
        # (once a dataset has been created using one local_path format, it
        # cannot be changed anymore without removing the all dataset /
        # restarting the dataset from scratch).
        #
        if d.local_path != f.dataset_local_path:
            raise SDException(
                "SDENQUEU-008",
                "Incorrect local path format (existing_format=%s,new_format=%s)"
                % (d.local_path, f.dataset_local_path))

        # compute new dataset status
        if d.status == sdconst.DATASET_STATUS_IN_PROGRESS:
            d.status = sdconst.DATASET_STATUS_IN_PROGRESS

        elif d.status == sdconst.DATASET_STATUS_EMPTY:
            d.status = sdconst.DATASET_STATUS_EMPTY

        elif d.status == sdconst.DATASET_STATUS_COMPLETE:
            d.status = sdconst.DATASET_STATUS_IN_PROGRESS  # this means that a dataset may be "in-progress" and also "latest"

        # Note related to the "latest" dataset column
        #
        # Adding new files to a datasets may change the status, but don't
        # change dataset "latest" flag.  This is because a dataset can only
        # downgrade here ("complete" => "in-progress"), or stay the same. And
        # when a dataset downgrade, "latest" flag, if true, stay as is, and if
        # false, stay as is also.

        # "last_mod_date" is only modified here (i.e. it is not modified when
        # dataset's files status change). in other words, it changes only when
        # adding new files to it using this script.
        #
        d.last_mod_date = sdtime.now()

        sddatasetdao.update_dataset(d, commit=False)

        return d.dataset_id

    else:
        sdlog.info("SDENQUEU-002",
                   "create dataset (dataset_path=%s)" % (f.dataset_path))

        d = Dataset()

        d.local_path = f.dataset_local_path
        d.path = f.dataset_path
        d.path_without_version = f.dataset_path_without_version
        d.dataset_functional_id = f.dataset_functional_id
        d.template = f.dataset_template
        d.version = f.dataset_version
        d.project = f.project
        d.status = sdconst.DATASET_STATUS_EMPTY
        d.latest = False
        d.crea_date = sdtime.now()
        d.last_mod_date = sdtime.now()

        # non-mandatory attributes
        d.timestamp = f.dataset_timestamp if hasattr(
            f, 'dataset_timestamp') else None
        d.model = f.model if hasattr(f, 'model') else None

        return sddatasetdao.add_dataset(d, commit=False)
예제 #30
0
def transfers_end():

    _, _, access_token = api_client.goauth.get_access_token(
        username=globus_username, password=globus_password)
    api = api_client.TransferAPIClient(username=globus_username,
                                       goauth=access_token)

    for task_id in globus_tasks:

        code, reason, data = api.task(task_id, fields="status")
        status = data['status']

        sdlog.debug(
            "SDDMGLOB-016",
            "Checking the status of Globus transfer tasks, id: %s, status: %s"
            % (task_id, status))
        for item in globus_tasks[task_id]['items']:
            tr = item['tr']
            if status == "SUCCEEDED":

                assert tr.size is not None

                if int(tr.size) != os.path.getsize(tr.get_full_local_path()):
                    sdlog.error(
                        "SDDMGLOB-002",
                        "size don't match (remote_size=%i,local_size=%i,local_path=%s)"
                        % (int(tr.size),
                           os.path.getsize(tr.get_full_local_path()),
                           tr.get_full_local_path()))

                # retrieve local and remote checksum
                checksum_type = tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5
                local_checksum = sdutils.compute_checksum(
                    tr.get_full_local_path(), checksum_type)
                remote_checksum = tr.checksum  # retrieve remote checksum

                if remote_checksum != None:
                    # remote checksum exists

                    # compare local and remote checksum
                    if remote_checksum == local_checksum:
                        # checksum is ok

                        tr.status = sdconst.TRANSFER_STATUS_DONE
                    else:
                        # checksum is not ok

                        if incorrect_checksum_action == "remove":
                            tr.status = sdconst.TRANSFER_STATUS_ERROR
                            tr.priority -= 1
                            tr.error_msg = "File corruption detected: local checksum doesn't match remote checksum"

                            # remove file from local repository
                            sdlog.error(
                                "SDDMGLOB-155",
                                "checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"
                                % (local_checksum, remote_checksum,
                                   tr.get_full_local_path()))
                            try:
                                os.remove(tr.get_full_local_path())
                            except Exception, e:
                                sdlog.error(
                                    "SDDMGLOB-158",
                                    "error occurs while removing local file (%s)"
                                    % tr.get_full_local_path())

                        elif incorrect_checksum_action == "keep":
                            sdlog.info(
                                "SDDMGLOB-157",
                                "local checksum doesn't match remote checksum (%s)"
                                % tr.get_full_local_path())

                            tr.status = sdconst.TRANSFER_STATUS_DONE

                        else:
                            raise FatalException(
                                "SDDMGLOB-507", "incorrect value (%s)" %
                                incorrect_checksum_action)
                else:
                    # remote checksum is missing
                    # NOTE: we DON'T store the local checksum ('file' table contains only the REMOTE checksum)

                    tr.status = sdconst.TRANSFER_STATUS_DONE

                if tr.status == sdconst.TRANSFER_STATUS_DONE:
                    tr.end_date = sdtime.now(
                    )  # WARNING: this is not the real end of transfer date but the date when we ask the globus scheduler if the transfer is done.
                    tr.error_msg = ""
                    sdlog.info("SDDMGLOB-101", "Transfer done (%s)" % str(tr))

            elif status == "FAILED":
                tr.status = sdconst.TRANSFER_STATUS_ERROR
                tr.priority -= 1
                tr.error_msg = "Error occurs during download."

                sdlog.info("SDDMGLOB-101", "Transfer failed (%s)" % str(tr))

                # Remove local file if exists
                if os.path.isfile(tr.get_full_local_path()):
                    try:
                        os.remove(tr.get_full_local_path())
                    except Exception, e:
                        sdlog.error(
                            "SDDMGLOB-528",
                            "Error occurs during file suppression (%s,%s)" %
                            (tr.get_full_local_path(), str(e)))
예제 #31
0
def transfers_end():

    _, _, access_token = api_client.goauth.get_access_token(username=globus_username, password=globus_password)
    api = api_client.TransferAPIClient(username=globus_username, goauth=access_token)

    for task_id in globus_tasks:

        code, reason, data = api.task(task_id, fields="status")
        status = data['status']

        sdlog.debug("SDDMGLOB-016", "Checking the status of Globus transfer tasks, id: %s, status: %s" % (task_id, status))
        for item in globus_tasks[task_id]['items']:
            tr = item['tr']
            if status == "SUCCEEDED":

                assert tr.size is not None

                if int(tr.size) != os.path.getsize(tr.get_full_local_path()):
                    sdlog.error("SDDMGLOB-002","size don't match (remote_size=%i,local_size=%i,local_path=%s)"%(int(tr.size),os.path.getsize(tr.get_full_local_path()),tr.get_full_local_path()))

                # retrieve local and remote checksum
                checksum_type=tr.checksum_type if tr.checksum_type is not None else 'md5'
                local_checksum=sdutils.compute_checksum(tr.get_full_local_path(),checksum_type)
                remote_checksum=tr.checksum # retrieve remote checksum

                if remote_checksum!=None:
                    # remote checksum exists

                    # compare local and remote checksum
                    if remote_checksum==local_checksum:
                        # checksum is ok

                        tr.status = sdconst.TRANSFER_STATUS_DONE
                    else:
                        # checksum is not ok

                        if incorrect_checksum_action=="remove":
                            tr.status=sdconst.TRANSFER_STATUS_ERROR
                            tr.error_msg="File corruption detected: local checksum doesn't match remote checksum"

                            # remove file from local repository
                            sdlog.error("SDDMGLOB-155","checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"%(local_checksum,remote_checksum,tr.get_full_local_path()))
                            try:
                                os.remove(tr.get_full_local_path())
                            except Exception,e:
                                sdlog.error("SDDMGLOB-158","error occurs while removing local file (%s)"%tr.get_full_local_path())

                        elif incorrect_checksum_action=="keep":
                            sdlog.info("SDDMGLOB-157","local checksum doesn't match remote checksum (%s)"%tr.get_full_local_path())
                            
                            tr.status=sdconst.TRANSFER_STATUS_DONE

                        else:
                            raise FatalException("SDDMGLOB-507","incorrect value (%s)"%incorrect_checksum_action)
                else:
                    # remote checksum is missing
                    # NOTE: we DON'T store the local checksum ('file' table contains only the REMOTE checksum)

                    tr.status = sdconst.TRANSFER_STATUS_DONE

                if tr.status == sdconst.TRANSFER_STATUS_DONE:
                    tr.end_date=sdtime.now() # WARNING: this is not the real end of transfer date but the date when we ask the globus scheduler if the transfer is done.
                    tr.error_msg=""
                    sdlog.info("SDDMGLOB-101", "Transfer done (%s)" % str(tr))

            elif status == "FAILED":
                tr.status = sdconst.TRANSFER_STATUS_ERROR
                tr.error_msg = "Error occurs during download."

                sdlog.info("SDDMGLOB-101", "Transfer failed (%s)" % str(tr))

                # Remove local file if exists
                if os.path.isfile(tr.get_full_local_path()):
                    try:
                        os.remove(tr.get_full_local_path())
                    except Exception,e:
                        sdlog.error("SDDMGLOB-528","Error occurs during file suppression (%s,%s)"%(tr.get_full_local_path(),str(e)))
예제 #32
0
def store_dataset_export_event(d,conn=sddb.conn):
    c=conn.cursor()
    c.execute("insert into export (dataset_id,export_date) values (?,?)",(d.dataset_id,sdtime.now(),))
    conn.commit()
    c.close()
예제 #33
0
def add_dataset(f):
    """
    Returns:
        dataset_id
    """
    d=sddatasetdao.get_dataset(dataset_functional_id=f.dataset_functional_id)
    if d is not None:

        # check dataset local path format
        #
        # (once a dataset has been created using one local_path format, it
        # cannot be changed anymore without removing the all dataset /
        # restarting the dataset from scratch).
        #
        if d.local_path!=f.dataset_local_path:
            raise SDException("SDENQUEU-008","Incorrect local path format (existing_format=%s,new_format=%s)"%(d.local_path,f.dataset_local_path))

        # compute new dataset status
        if d.status==sdconst.DATASET_STATUS_IN_PROGRESS:
            d.status=sdconst.DATASET_STATUS_IN_PROGRESS

        elif d.status==sdconst.DATASET_STATUS_EMPTY:
            d.status=sdconst.DATASET_STATUS_EMPTY

        elif d.status==sdconst.DATASET_STATUS_COMPLETE:
            d.status=sdconst.DATASET_STATUS_IN_PROGRESS # this means that a dataset may be "in-progress" and also "latest"


        # Note related to the "latest" dataset column
        #
        # Adding new files to a datasets may change the status, but don't
        # change dataset "latest" flag.  This is because a dataset can only
        # downgrade here ("complete" => "in-progress"), or stay the same. And
        # when a dataset downgrade, "latest" flag, if true, stay as is, and if
        # false, stay as is also.

        # "last_mod_date" is only modified here (i.e. it is not modified when
        # dataset's files status change). in other words, it changes only when
        # adding new files to it using this script.
        #
        d.last_mod_date=sdtime.now()


        sddatasetdao.update_dataset(d,commit=False)

        return d.dataset_id

    else:
        sdlog.info("SDENQUEU-002","create dataset (dataset_path=%s)"%(f.dataset_path))

        d=Dataset()

        d.local_path=f.dataset_local_path
        d.path=f.dataset_path
        d.path_without_version=f.dataset_path_without_version
        d.dataset_functional_id=f.dataset_functional_id
        d.template=f.dataset_template
        d.version=f.dataset_version
        d.project=f.project
        d.status=sdconst.DATASET_STATUS_EMPTY
        d.latest=False
        d.crea_date=sdtime.now()
        d.last_mod_date=sdtime.now()

        # non-mandatory attributes
        d.timestamp=f.dataset_timestamp if hasattr(f,'dataset_timestamp') else None
        d.model=f.model if hasattr(f,'model') else None

        return sddatasetdao.add_dataset(d,commit=False)
예제 #34
0
    def start_transfer_script(cls, tr):

        if sdconfig.fake_download:
            tr.status = sdconst.TRANSFER_STATUS_DONE
            tr.error_msg = ""
            tr.sdget_error_msg = ""
            return

        # main
        (tr.sdget_status, killed, tr.sdget_error_msg) = sdget.download(
            tr.url,
            tr.get_full_local_path(),
            debug=False,
            http_client=sdconst.HTTP_CLIENT_WGET,
            timeout=sdconst.ASYNC_DOWNLOAD_HTTP_TIMEOUT,
            verbosity=0,
            buffered=True,
            hpss=hpss)

        # check
        assert tr.size is not None

        # compute metrics
        tr.end_date = sdtime.now()
        tr.duration = sdtime.compute_duration(tr.start_date, tr.end_date)
        tr.rate = sdtools.compute_rate(tr.size, tr.duration)

        # post-processing
        if tr.sdget_status == 0:

            if int(tr.size) != os.path.getsize(tr.get_full_local_path()):
                sdlog.error(
                    "SDDMDEFA-002",
                    "size don't match (remote_size=%i,local_size=%i,local_path=%s)"
                    % (int(tr.size), os.path.getsize(
                        tr.get_full_local_path()), tr.get_full_local_path()))

            # retrieve remote checksum
            remote_checksum = tr.checksum

            if remote_checksum != None:
                # remote checksum exists

                # compute local checksum
                checksum_type = tr.checksum_type if tr.checksum_type is not None else sdconst.CHECKSUM_TYPE_MD5  # fallback to 'md5' (arbitrary)
                local_checksum = sdutils.compute_checksum(
                    tr.get_full_local_path(), checksum_type)

                # compare local and remote checksum
                if remote_checksum == local_checksum:
                    # checksum is ok

                    tr.status = sdconst.TRANSFER_STATUS_DONE
                    tr.error_msg = ""
                else:
                    # checksum is not ok

                    if incorrect_checksum_action == "remove":
                        tr.status = sdconst.TRANSFER_STATUS_ERROR
                        tr.error_msg = "File corruption detected: local checksum doesn't match remote checksum"

                        # remove file from local repository
                        sdlog.error(
                            "SDDMDEFA-155",
                            "checksum don't match: remove local file (local_checksum=%s,remote_checksum=%s,local_path=%s)"
                            % (local_checksum, remote_checksum,
                               tr.get_full_local_path()))
                        try:
                            os.remove(tr.get_full_local_path())
                        except Exception, e:
                            sdlog.error(
                                "SDDMDEFA-158",
                                "error occurs while removing local file (%s)" %
                                tr.get_full_local_path())

                    elif incorrect_checksum_action == "keep":
                        sdlog.info(
                            "SDDMDEFA-157",
                            "local checksum doesn't match remote checksum (%s)"
                            % tr.get_full_local_path())

                        tr.status = sdconst.TRANSFER_STATUS_DONE
                        tr.error_msg = ""
                    else:
                        raise sdexception.FatalException(
                            "SDDMDEFA-507",
                            "incorrect value (%s)" % incorrect_checksum_action)
예제 #35
0
def add_history_line(action,selection_filename=None,insertion_group_id=None,conn=sddb.conn):
    c = conn.cursor()
    c.execute("insert into history (action, selection_filename, crea_date, insertion_group_id) values (?,?,?,?)",(action, selection_filename, sdtime.now(), insertion_group_id))
    c.close()
    conn.commit()