Ejemplo n.º 1
0
def complete(files):
    for f in files:

        # the if/else block below is because this module can be used to process different metadata type (File and Dataset).
        if f["type"]==sdconst.SA_TYPE_FILE:
            transfer=sdfiledao.get_file(f['file_functional_id'])

            if transfer<>None:
                f['status']=transfer.status

                if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). It is usefull here for example when user wants to change priority (YES, a search-API request is needed in this case!).
                    f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority')
                else:
                    f['priority']=transfer.priority
            else:
                f['status']=sdconst.TRANSFER_STATUS_NEW

                if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). This is usefull here to set special priority for new files.
                    f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority')
                else:
                    f['priority']=sdconst.DEFAULT_PRIORITY

        elif f["type"]==sdconst.SA_TYPE_DATASET:
            dataset=sddatasetdao.get_dataset(dataset_functional_id=f['dataset_functional_id'])

            if dataset<>None:
                f['status']=dataset.status
            else:
                f['status']=sdconst.DATASET_STATUS_NEW
        else:
            raise SDException('SDCOMPLE-001','Incorrect type (%s)'%f["type"])

    return files
Ejemplo n.º 2
0
def get_dataset_local_path(f):
    fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT)

    if fmt=="treevar":
        path="%(dataset_path)s"%f # note that we don't add var folder here (we do ot only for the file local path)
    elif fmt=="tree":
        path="%(dataset_path)s"%f
    elif fmt=="custom":

        # note: 'sdreducecol' filter must be disabled when using this format

        custom_dataset_template=sdpostpipelineutils.get_attached_parameter(f,'local_path_drs_template')
        if custom_dataset_template is not None:
            path=custom_dataset_template%f
        else:
            raise SDException('SDLOCALP-014',"'local_path_drs_template' must be set when 'local_path_format' is set to 'custom'.")

    elif fmt=="homemade":

        # note: 'sdreducecol' filter must be disabled when using this format

        path=local_path_homemade_transform(f)
    elif fmt=="notree":
        path=""
    else:
        raise SDException('SDLOCALP-010',"'local_path_format' is incorrect (%s)"%fmt)

    return path    
Ejemplo n.º 3
0
def transform_local_path_product(files):
    for f in files:
        fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_product_format',sdconst.DEFAULT_LOCAL_PATH_PRODUCT_FORMAT)

        if fmt=="normal":
            pass
        elif fmt=="merge":

            path=f["dataset_local_path"]

            # TODO: move to sdproduct

            for product in ['/output1/','/output2/']:
                path=path.replace(product,"/output/")

            f["dataset_local_path"]=path
        elif fmt=="remove":

            path=f["dataset_local_path"]

            # TODO: move to sdproduct

            for product in ['/output/','/output1/','/output2/']:
                path=path.replace(product,"/")

            f["dataset_local_path"]=path
        else:
            raise SDException('SDLOCALP-002',"'local_path_product_format' is incorrect (%s)"%fmt)

    return files
Ejemplo n.º 4
0
def nearest_flag_set_on_all_files(files):
    """This func checks that all files have the 'nearest' flag (as sdnearestpost processing type is 'interfile', we need ALL files to be flagged)."""

    for f in files:
        nearest=sdpostpipelineutils.get_attached_parameter(f,'nearest','false')
        if nearest=='false':
            return False
    return True
Ejemplo n.º 5
0
def get_file_local_path(f):
    fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT)

    if fmt=="treevar":
        path="%(dataset_local_path)s/%(variable)s/%(filename)s" % f
    elif fmt=="tree":
        path="%(dataset_local_path)s/%(filename)s"%f
    elif fmt=="notree":
        path="%(filename)s"%f
    else:
        raise SDException('SDLOCALP-001',"'local_path_format' is incorrect (%s)"%fmt)

    return path    
Ejemplo n.º 6
0
def get_dataset_local_path(f):
    fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT)

    if fmt=="treevar":
        path="%(dataset_path)s"%f # note that we don't add var folder here (we do ot only for the file local path)
    elif fmt=="tree":
        path="%(dataset_path)s"%f
    elif fmt=="notree":
        path=""
    else:
        raise SDException('SDLOCALP-010',"'local_path_format' is incorrect (%s)"%fmt)

    return path    
Ejemplo n.º 7
0
def nearest_flag_set_on_all_files(metadata):
    """This func checks that all files have the 'nearest' flag (as sdnearestpost processing type is 'interfile', we need ALL files to be flagged)."""
    status=True

    # create light list with needed columns only not to overload system memory
    light_metadata=sdlmattrfilter.run(metadata,['attached_parameters']) # we keep 'attached_parameters' because it contains 'nearest' flag we are interested in

    for f in light_metadata.get_files(): # load complete list in memory
        nearest=sdpostpipelineutils.get_attached_parameter(f,'nearest','false')
        if nearest=='false': # one false wins
            status=False
            break

    return status
Ejemplo n.º 8
0
def transform_local_path_product(files,key):

    for f in files:
        fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_product_format',sdconst.DEFAULT_LOCAL_PATH_PRODUCT_FORMAT)

        if fmt=="normal":
            pass
        elif fmt=="merge":
            f[key]=sdproduct.replace_output12_product_with_output_product(f[key])
        elif fmt=="remove":
            f[key]=sdproduct.remove_product(f[key])
        else:
            raise sdexception.SDException('SDLOCALP-002',"'local_path_product_format' is incorrect (%s)"%fmt)

    return files
Ejemplo n.º 9
0
def run(files):
    new_files=[]
    for file in files:
        allowed_time_ranges=sdpostpipelineutils.get_attached_parameter(file,'timeslice')

        if allowed_time_ranges is None:
            new_files.append(file)
        else:
            file_timeslice=get_timeslice_from_filename(file['title'])

            if timeslice_in_allowed_time_range(file_timeslice,allowed_time_ranges):
                new_files.append(file)
            else:
                pass

    return new_files
Ejemplo n.º 10
0
def nearest_flag_set_on_all_files(metadata):
    """This func checks that all files have the 'nearest' flag (as sdnearestpost processing type is 'interfile', we need ALL files to be flagged)."""
    status = True

    # create light list with needed columns only not to overload system memory
    light_metadata = sdlmattrfilter.run(
        metadata, ['attached_parameters']
    )  # we keep 'attached_parameters' because it contains 'nearest' flag we are interested in

    for f in light_metadata.get_files():  # load complete list in memory
        nearest = sdpostpipelineutils.get_attached_parameter(
            f, 'nearest', 'false')
        if nearest == 'false':  # one false wins
            status = False
            break

    return status
Ejemplo n.º 11
0
def run(files):
    for file in files:
        protocol = sdpostpipelineutils.get_attached_parameter(
            file, 'protocol', sdconst.TRANSFER_PROTOCOL_HTTP)

        if protocol not in sdconst.TRANSFER_PROTOCOLS:
            raise SDException("SYNPROTO-004",
                              "Incorrect protocol (%s)" % protocol)

        if protocol == sdconst.TRANSFER_PROTOCOL_GLOBUS:
            if 'url_globus' in file:
                file['url'] = file['url_globus']
            elif 'url_gridftp' in file:
                file['url'] = file['url_gridftp']
            elif 'url_http' in file:
                sdlog.warning('SYNPROTO-005',
                              'Fallback to http as globus url is missing')
                file['url'] = file['url_http']

        elif protocol == sdconst.TRANSFER_PROTOCOL_GRIDFTP:
            if 'url_gridftp' in file:
                file['url'] = file['url_gridftp']
            elif 'url_http' in file:
                sdlog.debug(
                    'SYNPROTO-002',
                    'Fallback to http as gridftp url is missing (%s)' %
                    file["title"])
                file['url'] = file['url_http']

        elif protocol == sdconst.TRANSFER_PROTOCOL_HTTP:
            if 'url_http' in file:
                file['url'] = file['url_http']
            elif 'url_gridftp' in file:
                sdlog.warning('SYNPROTO-001',
                              'Fallback to gridftp as http url is missing')
                file['url'] = file['url_gridftp']

        else:
            raise SDException("SYNPROTO-003",
                              "Incorrect protocol (%s)" % protocol)

        sdtools.remove_dict_items(
            file, ['url_globus', 'url_gridftp', 'url_http', 'url_opendap'])

    return files
Ejemplo n.º 12
0
def run(files):
    new_files = []
    for file in files:
        allowed_time_ranges = sdpostpipelineutils.get_attached_parameter(
            file, 'timeslice')

        if allowed_time_ranges is None:
            new_files.append(file)
        else:
            file_timeslice = get_timeslice_from_filename(file['title'])

            if timeslice_in_allowed_time_range(file_timeslice,
                                               allowed_time_ranges):
                new_files.append(file)
            else:
                pass

    return new_files
Ejemplo n.º 13
0
def run(files):
    new_files=[]

    for f in files:

        # retrieve status attributes
        status=f['status']                                                   # scalar
        status_filter=sdpostpipelineutils.get_attached_parameter(f,'status') # list

        if status_filter is None:
            new_files.append(f)
        else:
            assert isinstance(status_filter,list)
            if status in status_filter:
                new_files.append(f)
            else:
                pass

    return new_files
Ejemplo n.º 14
0
def transform_local_path_project(files):
    def extract_project(path):
        m=re.search('^([^/]+)/',path)
        if m!=None:
            project=m.group(1)

            return project
        else:
            raise SDException('SDLOCALP-006','Incorrect value (path=%s)'%path)

    for f in files:
        fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_project_format',sdconst.DEFAULT_LOCAL_PATH_PROJECT_FORMAT)
        if fmt=="uc":

            path=f["dataset_local_path"]
            project=extract_project(path)

            f["dataset_local_path"]=re.sub('^'+project,project.upper(),path)
    return files
Ejemplo n.º 15
0
def run(files):
    for file in files:
        protocol=sdpostpipelineutils.get_attached_parameter(file,'protocol',sdconst.TRANSFER_PROTOCOL_HTTP)

        if protocol not in sdconst.TRANSFER_PROTOCOLS:
            raise SDException("SYNPROTO-004","Incorrect protocol (%s)"%protocol)

        if 'url_gridftp' in file and 'url_http' in file:

            if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP:
                file['url']=file['url_gridftp']
            elif protocol==sdconst.TRANSFER_PROTOCOL_HTTP:
                file['url']=file['url_http']
            else:
                raise SDException("SYNPROTO-003","Incorrect protocol (%s)"%protocol)

        elif 'url_gridftp' in file:
            # only gridftp

            if protocol==sdconst.TRANSFER_PROTOCOL_HTTP:
                sdlog.warning('SYNPROTO-001','Fallback to gridftp as http url is missing')

            file['url']=file['url_gridftp']

        elif 'url_http' in file:
            # only http
    
            if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP:
                sdlog.debug('SYNPROTO-002','Fallback to http as gridftp url is missing (%s)'%file["title"])

            file['url']=file['url_http']

        else:
            # no url available to download the file
            # (should not be here as sdremoverow takes care of those cases)

            assert False


        sdtools.remove_dict_items(file,['url_gridftp', 'url_http', 'url_opendap'])

    return files
Ejemplo n.º 16
0
def run(files):
    new_files = []

    for f in files:

        # retrieve status attributes
        status = f['status']  # scalar
        status_filter = sdpostpipelineutils.get_attached_parameter(
            f, 'status')  # list

        if status_filter is None:
            new_files.append(f)
        else:
            assert isinstance(status_filter, list)
            if status in status_filter:
                new_files.append(f)
            else:
                pass

    return new_files
Ejemplo n.º 17
0
def transform_local_path_product(files, key):

    for f in files:
        fmt = sdpostpipelineutils.get_attached_parameter(
            f, 'local_path_product_format',
            sdconst.DEFAULT_LOCAL_PATH_PRODUCT_FORMAT)

        if fmt == "normal":
            pass
        elif fmt == "merge":
            f[key] = sdproduct.replace_output12_product_with_output_product(
                f[key])
        elif fmt == "remove":
            f[key] = sdproduct.remove_product(f[key])
        else:
            raise sdexception.SDException(
                'SDLOCALP-002',
                "'local_path_product_format' is incorrect (%s)" % fmt)

    return files
Ejemplo n.º 18
0
def build_file_local_path(f):
    fmt = sdpostpipelineutils.get_attached_parameter(
        f, 'local_path_format', sdconst.DEFAULT_LOCAL_PATH_FORMAT)

    if fmt == "treevar":
        path = "%(dataset_local_path)s/%(variable)s/%(filename)s" % f
    elif fmt == "tree":
        path = "%(dataset_local_path)s/%(filename)s" % f
    elif fmt == "custom":
        path = "%(dataset_local_path)s/%(filename)s" % f
    elif fmt == "customvar":  # TAGJ34234JK24
        path = "%(dataset_local_path)s/%(variable)s/%(filename)s" % f
    elif fmt == "homemade":
        path = "%(dataset_local_path)s/%(filename)s" % f
    elif fmt == "notree":
        path = "%(filename)s" % f
    else:
        raise SDException('SDLOCALP-001',
                          "'local_path_format' is incorrect (%s)" % fmt)

    return path
Ejemplo n.º 19
0
def transform_local_path_project(files, key):
    def extract_project(path):
        m = re.search('^([^/]+)/', path)
        if m != None:
            project = m.group(1)

            return project
        else:
            raise sdexception.SDException('SDLOCALP-006',
                                          'Incorrect value (path=%s)' % path)

    for f in files:
        fmt = sdpostpipelineutils.get_attached_parameter(
            f, 'local_path_project_format',
            sdconst.DEFAULT_LOCAL_PATH_PROJECT_FORMAT)
        if fmt == "uc":

            path = f[key]

            project = extract_project(path)

            f[key] = re.sub('^' + project, project.upper(), path)

    return files
Ejemplo n.º 20
0
def build_dataset_local_path(f):
    fmt = sdpostpipelineutils.get_attached_parameter(
        f, 'local_path_format', sdconst.DEFAULT_LOCAL_PATH_FORMAT)

    if fmt == "treevar":
        path = "%(dataset_path)s" % f  # note that we don't add var folder here (we do it only for the file local path)
    elif fmt == "tree":
        path = "%(dataset_path)s" % f
    elif fmt == "custom" or fmt == "customvar":  # TAGJ34234JK24

        # note: 'sdreducecol' filter must be disabled when using this format

        custom_dataset_template = sdpostpipelineutils.get_attached_parameter(
            f, 'local_path_drs_template')
        if custom_dataset_template is not None:

            keys = sdtools.extract_keys_from_template(custom_dataset_template)

            # check that only scalar facets are involved here
            # (i.e. we raise an exception if a facet is used in 'custom_dataset_template' and contains more than one value)
            #
            for key in keys:
                if key in f:
                    val = f[key]
                    if isinstance(val, list):
                        if len(val) == 0:
                            raise SDException(
                                'SDLOCALP-018',
                                "'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s,%s)"
                                % (key, f['dataset_functional_id'], val))
                        elif len(val) > 1:
                            raise SDException(
                                'SDLOCALP-016',
                                "Only scalar value can be used in 'local_path_drs_template' (%s,%s)"
                                % (f['dataset_functional_id'], val))
                else:
                    raise SDException(
                        'SDLOCALP-020',
                        "'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s)"
                        % (
                            key,
                            f['dataset_functional_id'],
                        ))

            # hack
            #
            # cast list to scalar if any
            #
            # this is needed when an attribute type is 'list' AND the attribute contains exactly one item.
            #
            # currently, it is used only for the following case:
            #     - we trigger CDF event for CORDEX dataset (ie which is a project with one var per dataset)
            #     - at TAGJ43KJ234JK, we build a dataset local path in a DATASET pipeline context,
            #       which mean that the variable attribute have the 'list' type (see sdpostxptransform for details)
            #     - so we solve this case here by casting list to scalar (this is ok because there is only
            #       one item in the dataset variable attribute with the CORDEX project (not true for every project)
            #
            values = {}
            for key in keys:
                values[key] = sdtools.scalar(f[key])

            path = custom_dataset_template % values
        else:
            raise SDException(
                'SDLOCALP-014',
                "'local_path_drs_template' must be set when 'local_path_format' is set to 'custom'."
            )

    elif fmt == "homemade":

        # note: 'sdreducecol' filter must be disabled when using this format

        path = local_path_homemade_transform(f)
    elif fmt == "notree":
        path = ""
    else:
        raise SDException('SDLOCALP-010',
                          "'local_path_format' is incorrect (%s)" % fmt)

    return path
Ejemplo n.º 21
0
def build_dataset_local_path(f):
    fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT)

    if fmt=="treevar":
        path="%(dataset_path)s"%f # note that we don't add var folder here (we do it only for the file local path)
    elif fmt=="tree":
        path="%(dataset_path)s"%f
    elif fmt=="custom" or fmt=="customvar": # TAGJ34234JK24

        # note: 'sdreducecol' filter must be disabled when using this format

        custom_dataset_template=sdpostpipelineutils.get_attached_parameter(f,'local_path_drs_template')
        if custom_dataset_template is not None:

            keys=sdtools.extract_keys_from_template(custom_dataset_template)

            # check that only scalar facets are involved here
            # (i.e. we raise an exception if a facet is used in 'custom_dataset_template' and contains more than one value)
            # 
            for key in keys:
                if key in f:
                    val=f[key]
                    if isinstance(val,list):
                        if len(val)==0:
                            raise SDException('SDLOCALP-018',"'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s,%s)"%(key,f['dataset_functional_id'],val))
                        elif len(val)>1:
                            raise SDException('SDLOCALP-016',"Only scalar value can be used in 'local_path_drs_template' (%s,%s)"%(f['dataset_functional_id'],val))
                else:
                    raise SDException('SDLOCALP-020',"'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s)"%(key,f['dataset_functional_id'],))

            # hack
            #
            # cast list to scalar if any
            #
            # this is needed when an attribute type is 'list' AND the attribute contains exactly one item.
            #
            # currently, it is used only for the following case:
            #     - we trigger CDF event for CORDEX dataset (ie which is a project with one var per dataset)
            #     - at TAGJ43KJ234JK, we build a dataset local path in a DATASET pipeline context,
            #       which mean that the variable attribute have the 'list' type (see sdpostxptransform for details) 
            #     - so we solve this case here by casting list to scalar (this is ok because there is only
            #       one item in the dataset variable attribute with the CORDEX project (not true for every project)
            #
            values={}
            for key in keys:
                values[key]=sdtools.scalar(f[key])

            path=custom_dataset_template%values
        else:
            raise SDException('SDLOCALP-014',"'local_path_drs_template' must be set when 'local_path_format' is set to 'custom'.")

    elif fmt=="homemade":

        # note: 'sdreducecol' filter must be disabled when using this format

        path=local_path_homemade_transform(f)
    elif fmt=="notree":
        path=""
    else:
        raise SDException('SDLOCALP-010',"'local_path_format' is incorrect (%s)"%fmt)

    return path