def complete(files): for f in files: # the if/else block below is because this module can be used to process different metadata type (File and Dataset). if f["type"]==sdconst.SA_TYPE_FILE: transfer=sdfiledao.get_file(f['file_functional_id']) if transfer<>None: f['status']=transfer.status if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). It is usefull here for example when user wants to change priority (YES, a search-API request is needed in this case!). f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority') else: f['priority']=transfer.priority else: f['status']=sdconst.TRANSFER_STATUS_NEW if sdpostpipelineutils.exists_attached_parameter(f,'priority'): # this is to allow setting priority using selection parameter (i.e. default priority can be overrided using selection parameter). This is usefull here to set special priority for new files. f['priority']=sdpostpipelineutils.get_attached_parameter(f,'priority') else: f['priority']=sdconst.DEFAULT_PRIORITY elif f["type"]==sdconst.SA_TYPE_DATASET: dataset=sddatasetdao.get_dataset(dataset_functional_id=f['dataset_functional_id']) if dataset<>None: f['status']=dataset.status else: f['status']=sdconst.DATASET_STATUS_NEW else: raise SDException('SDCOMPLE-001','Incorrect type (%s)'%f["type"]) return files
def get_dataset_local_path(f): fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT) if fmt=="treevar": path="%(dataset_path)s"%f # note that we don't add var folder here (we do ot only for the file local path) elif fmt=="tree": path="%(dataset_path)s"%f elif fmt=="custom": # note: 'sdreducecol' filter must be disabled when using this format custom_dataset_template=sdpostpipelineutils.get_attached_parameter(f,'local_path_drs_template') if custom_dataset_template is not None: path=custom_dataset_template%f else: raise SDException('SDLOCALP-014',"'local_path_drs_template' must be set when 'local_path_format' is set to 'custom'.") elif fmt=="homemade": # note: 'sdreducecol' filter must be disabled when using this format path=local_path_homemade_transform(f) elif fmt=="notree": path="" else: raise SDException('SDLOCALP-010',"'local_path_format' is incorrect (%s)"%fmt) return path
def transform_local_path_product(files): for f in files: fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_product_format',sdconst.DEFAULT_LOCAL_PATH_PRODUCT_FORMAT) if fmt=="normal": pass elif fmt=="merge": path=f["dataset_local_path"] # TODO: move to sdproduct for product in ['/output1/','/output2/']: path=path.replace(product,"/output/") f["dataset_local_path"]=path elif fmt=="remove": path=f["dataset_local_path"] # TODO: move to sdproduct for product in ['/output/','/output1/','/output2/']: path=path.replace(product,"/") f["dataset_local_path"]=path else: raise SDException('SDLOCALP-002',"'local_path_product_format' is incorrect (%s)"%fmt) return files
def nearest_flag_set_on_all_files(files): """This func checks that all files have the 'nearest' flag (as sdnearestpost processing type is 'interfile', we need ALL files to be flagged).""" for f in files: nearest=sdpostpipelineutils.get_attached_parameter(f,'nearest','false') if nearest=='false': return False return True
def get_file_local_path(f): fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT) if fmt=="treevar": path="%(dataset_local_path)s/%(variable)s/%(filename)s" % f elif fmt=="tree": path="%(dataset_local_path)s/%(filename)s"%f elif fmt=="notree": path="%(filename)s"%f else: raise SDException('SDLOCALP-001',"'local_path_format' is incorrect (%s)"%fmt) return path
def get_dataset_local_path(f): fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT) if fmt=="treevar": path="%(dataset_path)s"%f # note that we don't add var folder here (we do ot only for the file local path) elif fmt=="tree": path="%(dataset_path)s"%f elif fmt=="notree": path="" else: raise SDException('SDLOCALP-010',"'local_path_format' is incorrect (%s)"%fmt) return path
def nearest_flag_set_on_all_files(metadata): """This func checks that all files have the 'nearest' flag (as sdnearestpost processing type is 'interfile', we need ALL files to be flagged).""" status=True # create light list with needed columns only not to overload system memory light_metadata=sdlmattrfilter.run(metadata,['attached_parameters']) # we keep 'attached_parameters' because it contains 'nearest' flag we are interested in for f in light_metadata.get_files(): # load complete list in memory nearest=sdpostpipelineutils.get_attached_parameter(f,'nearest','false') if nearest=='false': # one false wins status=False break return status
def transform_local_path_product(files,key): for f in files: fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_product_format',sdconst.DEFAULT_LOCAL_PATH_PRODUCT_FORMAT) if fmt=="normal": pass elif fmt=="merge": f[key]=sdproduct.replace_output12_product_with_output_product(f[key]) elif fmt=="remove": f[key]=sdproduct.remove_product(f[key]) else: raise sdexception.SDException('SDLOCALP-002',"'local_path_product_format' is incorrect (%s)"%fmt) return files
def run(files): new_files=[] for file in files: allowed_time_ranges=sdpostpipelineutils.get_attached_parameter(file,'timeslice') if allowed_time_ranges is None: new_files.append(file) else: file_timeslice=get_timeslice_from_filename(file['title']) if timeslice_in_allowed_time_range(file_timeslice,allowed_time_ranges): new_files.append(file) else: pass return new_files
def nearest_flag_set_on_all_files(metadata): """This func checks that all files have the 'nearest' flag (as sdnearestpost processing type is 'interfile', we need ALL files to be flagged).""" status = True # create light list with needed columns only not to overload system memory light_metadata = sdlmattrfilter.run( metadata, ['attached_parameters'] ) # we keep 'attached_parameters' because it contains 'nearest' flag we are interested in for f in light_metadata.get_files(): # load complete list in memory nearest = sdpostpipelineutils.get_attached_parameter( f, 'nearest', 'false') if nearest == 'false': # one false wins status = False break return status
def run(files): for file in files: protocol = sdpostpipelineutils.get_attached_parameter( file, 'protocol', sdconst.TRANSFER_PROTOCOL_HTTP) if protocol not in sdconst.TRANSFER_PROTOCOLS: raise SDException("SYNPROTO-004", "Incorrect protocol (%s)" % protocol) if protocol == sdconst.TRANSFER_PROTOCOL_GLOBUS: if 'url_globus' in file: file['url'] = file['url_globus'] elif 'url_gridftp' in file: file['url'] = file['url_gridftp'] elif 'url_http' in file: sdlog.warning('SYNPROTO-005', 'Fallback to http as globus url is missing') file['url'] = file['url_http'] elif protocol == sdconst.TRANSFER_PROTOCOL_GRIDFTP: if 'url_gridftp' in file: file['url'] = file['url_gridftp'] elif 'url_http' in file: sdlog.debug( 'SYNPROTO-002', 'Fallback to http as gridftp url is missing (%s)' % file["title"]) file['url'] = file['url_http'] elif protocol == sdconst.TRANSFER_PROTOCOL_HTTP: if 'url_http' in file: file['url'] = file['url_http'] elif 'url_gridftp' in file: sdlog.warning('SYNPROTO-001', 'Fallback to gridftp as http url is missing') file['url'] = file['url_gridftp'] else: raise SDException("SYNPROTO-003", "Incorrect protocol (%s)" % protocol) sdtools.remove_dict_items( file, ['url_globus', 'url_gridftp', 'url_http', 'url_opendap']) return files
def run(files): new_files = [] for file in files: allowed_time_ranges = sdpostpipelineutils.get_attached_parameter( file, 'timeslice') if allowed_time_ranges is None: new_files.append(file) else: file_timeslice = get_timeslice_from_filename(file['title']) if timeslice_in_allowed_time_range(file_timeslice, allowed_time_ranges): new_files.append(file) else: pass return new_files
def run(files): new_files=[] for f in files: # retrieve status attributes status=f['status'] # scalar status_filter=sdpostpipelineutils.get_attached_parameter(f,'status') # list if status_filter is None: new_files.append(f) else: assert isinstance(status_filter,list) if status in status_filter: new_files.append(f) else: pass return new_files
def transform_local_path_project(files): def extract_project(path): m=re.search('^([^/]+)/',path) if m!=None: project=m.group(1) return project else: raise SDException('SDLOCALP-006','Incorrect value (path=%s)'%path) for f in files: fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_project_format',sdconst.DEFAULT_LOCAL_PATH_PROJECT_FORMAT) if fmt=="uc": path=f["dataset_local_path"] project=extract_project(path) f["dataset_local_path"]=re.sub('^'+project,project.upper(),path) return files
def run(files): for file in files: protocol=sdpostpipelineutils.get_attached_parameter(file,'protocol',sdconst.TRANSFER_PROTOCOL_HTTP) if protocol not in sdconst.TRANSFER_PROTOCOLS: raise SDException("SYNPROTO-004","Incorrect protocol (%s)"%protocol) if 'url_gridftp' in file and 'url_http' in file: if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP: file['url']=file['url_gridftp'] elif protocol==sdconst.TRANSFER_PROTOCOL_HTTP: file['url']=file['url_http'] else: raise SDException("SYNPROTO-003","Incorrect protocol (%s)"%protocol) elif 'url_gridftp' in file: # only gridftp if protocol==sdconst.TRANSFER_PROTOCOL_HTTP: sdlog.warning('SYNPROTO-001','Fallback to gridftp as http url is missing') file['url']=file['url_gridftp'] elif 'url_http' in file: # only http if protocol==sdconst.TRANSFER_PROTOCOL_GRIDFTP: sdlog.debug('SYNPROTO-002','Fallback to http as gridftp url is missing (%s)'%file["title"]) file['url']=file['url_http'] else: # no url available to download the file # (should not be here as sdremoverow takes care of those cases) assert False sdtools.remove_dict_items(file,['url_gridftp', 'url_http', 'url_opendap']) return files
def run(files): new_files = [] for f in files: # retrieve status attributes status = f['status'] # scalar status_filter = sdpostpipelineutils.get_attached_parameter( f, 'status') # list if status_filter is None: new_files.append(f) else: assert isinstance(status_filter, list) if status in status_filter: new_files.append(f) else: pass return new_files
def transform_local_path_product(files, key): for f in files: fmt = sdpostpipelineutils.get_attached_parameter( f, 'local_path_product_format', sdconst.DEFAULT_LOCAL_PATH_PRODUCT_FORMAT) if fmt == "normal": pass elif fmt == "merge": f[key] = sdproduct.replace_output12_product_with_output_product( f[key]) elif fmt == "remove": f[key] = sdproduct.remove_product(f[key]) else: raise sdexception.SDException( 'SDLOCALP-002', "'local_path_product_format' is incorrect (%s)" % fmt) return files
def build_file_local_path(f): fmt = sdpostpipelineutils.get_attached_parameter( f, 'local_path_format', sdconst.DEFAULT_LOCAL_PATH_FORMAT) if fmt == "treevar": path = "%(dataset_local_path)s/%(variable)s/%(filename)s" % f elif fmt == "tree": path = "%(dataset_local_path)s/%(filename)s" % f elif fmt == "custom": path = "%(dataset_local_path)s/%(filename)s" % f elif fmt == "customvar": # TAGJ34234JK24 path = "%(dataset_local_path)s/%(variable)s/%(filename)s" % f elif fmt == "homemade": path = "%(dataset_local_path)s/%(filename)s" % f elif fmt == "notree": path = "%(filename)s" % f else: raise SDException('SDLOCALP-001', "'local_path_format' is incorrect (%s)" % fmt) return path
def transform_local_path_project(files, key): def extract_project(path): m = re.search('^([^/]+)/', path) if m != None: project = m.group(1) return project else: raise sdexception.SDException('SDLOCALP-006', 'Incorrect value (path=%s)' % path) for f in files: fmt = sdpostpipelineutils.get_attached_parameter( f, 'local_path_project_format', sdconst.DEFAULT_LOCAL_PATH_PROJECT_FORMAT) if fmt == "uc": path = f[key] project = extract_project(path) f[key] = re.sub('^' + project, project.upper(), path) return files
def build_dataset_local_path(f): fmt = sdpostpipelineutils.get_attached_parameter( f, 'local_path_format', sdconst.DEFAULT_LOCAL_PATH_FORMAT) if fmt == "treevar": path = "%(dataset_path)s" % f # note that we don't add var folder here (we do it only for the file local path) elif fmt == "tree": path = "%(dataset_path)s" % f elif fmt == "custom" or fmt == "customvar": # TAGJ34234JK24 # note: 'sdreducecol' filter must be disabled when using this format custom_dataset_template = sdpostpipelineutils.get_attached_parameter( f, 'local_path_drs_template') if custom_dataset_template is not None: keys = sdtools.extract_keys_from_template(custom_dataset_template) # check that only scalar facets are involved here # (i.e. we raise an exception if a facet is used in 'custom_dataset_template' and contains more than one value) # for key in keys: if key in f: val = f[key] if isinstance(val, list): if len(val) == 0: raise SDException( 'SDLOCALP-018', "'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s,%s)" % (key, f['dataset_functional_id'], val)) elif len(val) > 1: raise SDException( 'SDLOCALP-016', "Only scalar value can be used in 'local_path_drs_template' (%s,%s)" % (f['dataset_functional_id'], val)) else: raise SDException( 'SDLOCALP-020', "'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s)" % ( key, f['dataset_functional_id'], )) # hack # # cast list to scalar if any # # this is needed when an attribute type is 'list' AND the attribute contains exactly one item. # # currently, it is used only for the following case: # - we trigger CDF event for CORDEX dataset (ie which is a project with one var per dataset) # - at TAGJ43KJ234JK, we build a dataset local path in a DATASET pipeline context, # which mean that the variable attribute have the 'list' type (see sdpostxptransform for details) # - so we solve this case here by casting list to scalar (this is ok because there is only # one item in the dataset variable attribute with the CORDEX project (not true for every project) # values = {} for key in keys: values[key] = sdtools.scalar(f[key]) path = custom_dataset_template % values else: raise SDException( 'SDLOCALP-014', "'local_path_drs_template' must be set when 'local_path_format' is set to 'custom'." ) elif fmt == "homemade": # note: 'sdreducecol' filter must be disabled when using this format path = local_path_homemade_transform(f) elif fmt == "notree": path = "" else: raise SDException('SDLOCALP-010', "'local_path_format' is incorrect (%s)" % fmt) return path
def build_dataset_local_path(f): fmt=sdpostpipelineutils.get_attached_parameter(f,'local_path_format',sdconst.DEFAULT_LOCAL_PATH_FORMAT) if fmt=="treevar": path="%(dataset_path)s"%f # note that we don't add var folder here (we do it only for the file local path) elif fmt=="tree": path="%(dataset_path)s"%f elif fmt=="custom" or fmt=="customvar": # TAGJ34234JK24 # note: 'sdreducecol' filter must be disabled when using this format custom_dataset_template=sdpostpipelineutils.get_attached_parameter(f,'local_path_drs_template') if custom_dataset_template is not None: keys=sdtools.extract_keys_from_template(custom_dataset_template) # check that only scalar facets are involved here # (i.e. we raise an exception if a facet is used in 'custom_dataset_template' and contains more than one value) # for key in keys: if key in f: val=f[key] if isinstance(val,list): if len(val)==0: raise SDException('SDLOCALP-018',"'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s,%s)"%(key,f['dataset_functional_id'],val)) elif len(val)>1: raise SDException('SDLOCALP-016',"Only scalar value can be used in 'local_path_drs_template' (%s,%s)"%(f['dataset_functional_id'],val)) else: raise SDException('SDLOCALP-020',"'%s' key used in 'local_path_drs_template' but value missing in file's attributes (%s)"%(key,f['dataset_functional_id'],)) # hack # # cast list to scalar if any # # this is needed when an attribute type is 'list' AND the attribute contains exactly one item. # # currently, it is used only for the following case: # - we trigger CDF event for CORDEX dataset (ie which is a project with one var per dataset) # - at TAGJ43KJ234JK, we build a dataset local path in a DATASET pipeline context, # which mean that the variable attribute have the 'list' type (see sdpostxptransform for details) # - so we solve this case here by casting list to scalar (this is ok because there is only # one item in the dataset variable attribute with the CORDEX project (not true for every project) # values={} for key in keys: values[key]=sdtools.scalar(f[key]) path=custom_dataset_template%values else: raise SDException('SDLOCALP-014',"'local_path_drs_template' must be set when 'local_path_format' is set to 'custom'.") elif fmt=="homemade": # note: 'sdreducecol' filter must be disabled when using this format path=local_path_homemade_transform(f) elif fmt=="notree": path="" else: raise SDException('SDLOCALP-010',"'local_path_format' is incorrect (%s)"%fmt) return path