def get_datasets( stream=None, parameter=None, post_pipeline_mode='dataset', dry_run=False ): # TODO: maybe remove parameter argument everywhere as there is a mess in get_selection_file_buffer, because of default/forced parameter (i.e. len(parameter) is non-zero even if non parameter args set on CLI !) if parameter is None: parameter = [] assert (stream is None) or ( len(parameter) < 1 ) # this is to prevent using stream and parameter together assert post_pipeline_mode != 'file' if len(parameter) > 0: sddeferredbefore.add_forced_parameter(parameter, 'type', 'Dataset') elif stream is not None: sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset') result = sdquicksearch.run(stream=stream, parameter=parameter, post_pipeline_mode=post_pipeline_mode, dry_run=dry_run) return result.get_files()
def get_files(stream=None,parameter=[],post_pipeline_mode='file',dry_run=False): # TODO: maybe remove parameter argument everywhere as there is a mess in get_selection_file_buffer, because of default/forced parameter (i.e. len(parameter) is non-zero even if non parameter args set on CLI !) assert (stream is None) or (len(parameter)<1) # this is to prevent using stream and parameter together if len(parameter)>0: sddeferredbefore.add_forced_parameter(parameter,'type','File') elif stream is not None: sddeferredbefore.add_forced_parameter(stream,'type','File') result=sdquicksearch.run(stream=stream,parameter=parameter,post_pipeline_mode=post_pipeline_mode,dry_run=dry_run) return result.files
def get_files(stream=None,parameter=[],dry_run=False): assert (stream is None) or (len(parameter)<1) # this is to prevent using stream and parameter together if len(parameter)>0: sddeferredbefore.add_forced_parameter(parameter,'type','File') elif stream is not None: sddeferredbefore.add_forced_parameter(stream,'type','File') files=sdlsearch.run(stream=stream,parameter=parameter,dry_run=dry_run) return files
def run(stream=None, path=None, parameter=[], index_host=None, dry_run=False, type_=sdconst.SA_TYPE_DATASET): # type management if stream is not None: sddeferredbefore.add_forced_parameter(stream, 'type', type_) else: # if stream is None, we assume 'parameter' mode # (see TAGJFJ4R4JKFFJD for more informations) sddeferredbefore.add_forced_parameter(parameter, 'type', type_) queries = sdpipeline.build_queries(stream=stream, path=path, parameter=parameter, index_host=index_host, parallel=False, load_default=False, count=True) if len(queries) < 1: raise SDException("SDQSEARC-001", "No query to process") # we don't support multiple queries because of duplicate/intersection between queries # (i.e. which num_found attribute to use (from which query..)) if len(queries) > 1: raise SDException( "SDQSEARC-100", "Too much query (multi-query is not allowed in this module, use sdquicksearch instead)" ) query = queries[0] if dry_run: request = sdtypes.Request(url=query['url'], pagination=False) print '%s' % request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: return ws_call(query) # return Response object
def set_stream_type(args): import sddeferredbefore # Set the sdtream type (aka search-API 'type'). # # Note that arg.type_ is NOT the same thing as the stream type (aka # search-API type). arg.type_ is only used locally to format the # listing presented to user, while the stream type is the one sent # to the ESGF service to retrieve data. For example, # SA_TYPE_AGGREGATION is used by arg.type_ to make some change in # the output, but search-API don't know about this type (i.e. for # most project, you can't list anything by using this type). Also # most modules of Synda behave the same way as search-API: they # don't know about SA_TYPE_AGGREGATION. SA_TYPE_AGGREGATION is ONLY # used in Synda upstream code to make some local display # modifications. # # So what we do here is choose which is the search-API type we need # (dataset, file) for the listing type asked by user (i.e. # variable, dataset, file) # # But note that in most case, search-API 'type' will be overrided # later anyway as it is forced in dedicated modules (e.g. in # sdrdataset, sdrfile, etc..). # # Also note that we 'force' (i.e. not 'default') the parameter here, so to # prevent user to set it. We do this because if user use '-f' option and # type=Dataset, the display type will not fit the type of data fetched from # search-API). # if args.type_ in (sdconst.SA_TYPE_AGGREGATION, sdconst.SA_TYPE_DATASET): sddeferredbefore.add_forced_parameter(args.stream, 'type', sdconst.SA_TYPE_DATASET) elif args.type_ in (sdconst.SA_TYPE_FILE, ): sddeferredbefore.add_forced_parameter(args.stream, 'type', sdconst.SA_TYPE_FILE) else: raise sdexception.SDException('SDASYNDA-001', 'Unknown type (%s)' % args.type_)
def set_stream_type(args): import sddeferredbefore # Set the sdtream type (aka search-API 'type'). # # Note that arg.type_ is NOT the same thing as the stream type (aka # search-API type). arg.type_ is only used locally to format the # listing presented to user, while the stream type is the one sent # to the ESGF service to retrieve data. For example, # SA_TYPE_AGGREGATION is used by arg.type_ to make some change in # the output, but search-API don't know about this type (i.e. for # most project, you can't list anything by using this type). Also # most modules of Synda behave the same way as search-API: they # don't know about SA_TYPE_AGGREGATION. SA_TYPE_AGGREGATION is ONLY # used in Synda upstream code to make some local display # modifications. # # So what we do here is choose which is the search-API type we need # (dataset, file) for the listing type asked by user (i.e. # variable, dataset, file) # # But note that in most case, search-API 'type' will be overrided # later anyway as it is forced in dedicated modules (e.g. in # sdrdataset, sdrfile, etc..). # # Also note that we 'force' (i.e. not 'default') the parameter here, so to # prevent user to set it. We do this because if user use '-f' option and # type=Dataset, the display type will not fit the type of data fetched from # search-API). # if args.type_ in (sdconst.SA_TYPE_AGGREGATION,sdconst.SA_TYPE_DATASET): sddeferredbefore.add_forced_parameter(args.stream,'type',sdconst.SA_TYPE_DATASET) elif args.type_ in (sdconst.SA_TYPE_FILE,): sddeferredbefore.add_forced_parameter(args.stream,'type',sdconst.SA_TYPE_FILE) else: from sdexception import SDException raise SDException('SDASYNDA-001','Unknown type (%s)'%args.type_)
def run(stream=None,path=None,parameter=[],index_host=None,dry_run=False,type_=sdconst.SA_TYPE_DATASET): # type management if stream is not None: sddeferredbefore.add_forced_parameter(stream,'type',type_) else: # if stream is None, we assume 'parameter' mode # (see TAGJFJ4R4JKFFJD for more informations) sddeferredbefore.add_forced_parameter(parameter,'type',type_) queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False,count=True) if len(queries)<1: raise SDException("SDQSEARC-001","No query to process") # we don't support multiple queries because of duplicate/intersection between queries # (i.e. which num_found attribute to use (from which query..)) if len(queries)>1: raise SDException("SDQSEARC-100","Too much query (multi-query is not allowed in this module, use sdquicksearch instead)") query=queries[0] if dry_run: request=sdtypes.Request(url=query['url'],pagination=False) print '%s'%request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: return ws_call(query) # return Response object
# # So what we do here is choose which is the search-API type we need # (dataset, file) for the listing type asked by user (i.e. # variable, dataset, file) # # But note that in most case, search-API 'type' will be overrided # later anyway as it is forced in dedicated modules (e.g. in # sdrdataset, sdrfile, etc..). # # Also note that we 'force' (i.e. not 'default') the parameter # here, so to prevent user to set it. We do this because if user # use '-f' option and type=Dataset, it will not do as the display # type will not fit the type of data fetched from search-API). # if args.type_ in (sdconst.SA_TYPE_AGGREGATION,sdconst.SA_TYPE_DATASET): sddeferredbefore.add_forced_parameter(stream,'type',sdconst.SA_TYPE_DATASET) elif args.type_ in (sdconst.SA_TYPE_FILE,): sddeferredbefore.add_forced_parameter(stream,'type',sdconst.SA_TYPE_FILE) else: from sdexception import SDException raise SDException('SDASYNDA-001','Unknown type (%s)'%args.type_) args.stream=stream # hack: pass 'stream' object downstream as a standalone argument (not inside args) import sdtsaction sdtsaction.actions[args.action](args) elif args.action in ['remove','install','stat']: # those actions systematically trigger full search (i.e. limit keyword cannot be used here) # check
def pexec(args): import sdsearch, sdpporder, sddb, syndautils, sdconst, sdpostpipelineutils, sdhistorydao, sddeferredbefore, sddomainutils if args.order_name=='cdf': selection_filename=None # use search-api operator to build datasets list stream=syndautils.get_stream(subcommand=args.subcommand,selection_file=args.selection_file,no_default=args.no_default) sddeferredbefore.add_forced_parameter(stream,'type','Dataset') dataset_found_count=0 order_variable_count=0 order_dataset_count=0 for facets_group in stream: # we need to process each facets_group one by one because of TAG45345JK3J53K metadata=sdsearch.run(stream=[facets_group],post_pipeline_mode='dataset') # TAGJ43KJ234JK dataset_found_count+=metadata.count() if metadata.count() > 0: # WART # (gets overwritten at each iteration, but not a big deal as always the same value) if selection_filename is None: # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None)) dataset=metadata.get_one_file() selection_filename=sdpostpipelineutils.get_attached_parameter__global([dataset],'selection_filename') # note that if no files are found at all for this selection (no matter the status), then the filename will be blank for d in metadata.get_files(): # warning: load list in memory if d['status']==sdconst.DATASET_STATUS_COMPLETE: # TAG45J4K45JK # first, send cdf variable order # (note: total number of variable event is given by: "total+=#variable for each ds") for v in d['variable']: if v in facets_group['variable']: # TAG45345JK3J53K (we check here that the variable has been asked for in the first place) order_variable_count+=1 # hack if sddomainutils.is_one_var_per_ds(d['project']): # maybe move this test at TAG45J4K45JK line, and replace 'EVENT_CDF_VARIABLE_O' by a dataset level event (note however that the choice about passing 'EVENT_CDF_VARIABLE_O' event as variable or dataset is arbitrary, both work. But passing as variable is a bit strange as variable appears in both dataset_pattern and variable columns) e_names=[sdconst.EVENT_CDF_INT_VARIABLE_O, sdconst.EVENT_CDF_COR_VARIABLE_O] # this case is a bit awkward as we have 'variable' in both dataset_pattern and variable columns.. else: e_names=[sdconst.EVENT_CDF_INT_VARIABLE_N, sdconst.EVENT_CDF_COR_VARIABLE_N] for e_name in e_names: sdpporder.submit(e_name,d['project'],d['model'],d['local_path'],variable=v,commit=False) # second, send cdf dataset order if d['project'] in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET: # we do not trigger 'dataset' level event in this case pass else: order_dataset_count+=1 e_names=[sdconst.EVENT_CDF_INT_DATASET, sdconst.EVENT_CDF_COR_DATASET] for e_name in e_names: sdpporder.submit(e_name,d['project'],d['model'],d['local_path'],commit=False) sddb.conn.commit() if dataset_found_count>0: if order_dataset_count==0 and order_variable_count==0: print_stderr("Data not ready (data must be already downloaded before performing pexec task): operation cancelled") else: sdhistorydao.add_history_line(sdconst.ACTION_PEXEC,selection_filename) print_stderr("Post-processing task successfully submitted (order_dataset_count=%d,order_variable_count=%d)"%(order_dataset_count,order_variable_count)) else: print_stderr('Data not found') elif args.order_name=='cds': selection_filename = None # use search-api operator to build datasets list stream = syndautils.get_stream(subcommand=args.subcommand, selection_file=args.selection_file, no_default=args.no_default) sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset') dataset_found_count = 0 order_variable_count = 0 for facets_group in stream: # we need to process each facets_group one by one because of TAG45345JK3J53K metadata = sdsearch.run(stream=[facets_group], post_pipeline_mode='dataset') # TAGJ43KJ234JK dataset_found_count += metadata.count() if metadata.count() > 0: # WART # (gets overwritten at each iteration, but not a big deal as always the same value) if selection_filename is None: # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None)) dataset = metadata.get_one_file() selection_filename = sdpostpipelineutils.get_attached_parameter__global([dataset], 'selection_filename') # note that if no files are found at all for this selection (no matter the status), then the filename will be blank for d in metadata.get_files(): # warning: load list in memory if d['status'] == sdconst.DATASET_STATUS_COMPLETE: # TAG45J4K45JK # send cds variable order # (note: total number of variable event is given by: "total+=#variable for each ds") for v in d['variable']: if v in facets_group['variable']: # TAG45345JK3J53K (we check here that the variable has been asked for in the first place) order_variable_count += 1 sdpporder.submit(sdconst.EVENT_CDS_VARIABLE, d['project'], d['model'], d['local_path'], variable=v, commit=False) sddb.conn.commit() if dataset_found_count > 0: if order_variable_count == 0: print_stderr("Data not ready (data must be already downloaded before performing pexec task): operation cancelled") else: sdhistorydao.add_history_line(sdconst.ACTION_PEXEC, selection_filename) print_stderr( "Post-processing task successfully submitted (order_variable_count=%d)" % (order_variable_count)) else: print_stderr('Data not found') else: print_stderr("Invalid order name ('%s')"%args.order_name) return 1 return 0
def pexec(args): import sdsearch, sdpporder, sddb, syndautils, sdconst, sdpostpipelineutils, sdhistorydao, sddeferredbefore, sddomainutils if args.order_name == 'cdf': selection_filename = None # use search-api operator to build datasets list stream = syndautils.get_stream(subcommand=args.subcommand, selection_file=args.selection_file, no_default=args.no_default) sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset') dataset_found_count = 0 order_variable_count = 0 order_dataset_count = 0 for facets_group in stream: # we need to process each facets_group one by one because of TAG45345JK3J53K metadata = sdsearch.run( stream=[facets_group], post_pipeline_mode='dataset') # TAGJ43KJ234JK dataset_found_count += metadata.count() if metadata.count() > 0: # WART # (gets overwritten at each iteration, but not a big deal as always the same value) if selection_filename is None: # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None)) dataset = metadata.get_one_file() selection_filename = sdpostpipelineutils.get_attached_parameter__global( [dataset], 'selection_filename' ) # note that if no files are found at all for this selection (no matter the status), then the filename will be blank for d in metadata.get_files(): # warning: load list in memory if d['status'] == sdconst.DATASET_STATUS_COMPLETE: # TAG45J4K45JK # first, send cdf variable order # (note: total number of variable event is given by: "total+=#variable for each ds") for v in d['variable']: if v in facets_group[ 'variable']: # TAG45345JK3J53K (we check here that the variable has been asked for in the first place) order_variable_count += 1 # hack if sddomainutils.is_one_var_per_ds( d['project'] ): # maybe move this test at TAG45J4K45JK line, and replace 'EVENT_CDF_VARIABLE_O' by a dataset level event (note however that the choice about passing 'EVENT_CDF_VARIABLE_O' event as variable or dataset is arbitrary, both work. But passing as variable is a bit strange as variable appears in both dataset_pattern and variable columns) e_names = [ sdconst.EVENT_CDF_INT_VARIABLE_O, sdconst.EVENT_CDF_COR_VARIABLE_O ] # this case is a bit awkward as we have 'variable' in both dataset_pattern and variable columns.. else: e_names = [ sdconst.EVENT_CDF_INT_VARIABLE_N, sdconst.EVENT_CDF_COR_VARIABLE_N ] for e_name in e_names: sdpporder.submit(e_name, d['project'], d['model'], d['local_path'], variable=v, commit=False) # second, send cdf dataset order if d['project'] in sdconst.PROJECT_WITH_ONE_VARIABLE_PER_DATASET: # we do not trigger 'dataset' level event in this case pass else: order_dataset_count += 1 e_names = [ sdconst.EVENT_CDF_INT_DATASET, sdconst.EVENT_CDF_COR_DATASET ] for e_name in e_names: sdpporder.submit(e_name, d['project'], d['model'], d['local_path'], commit=False) sddb.conn.commit() if dataset_found_count > 0: if order_dataset_count == 0 and order_variable_count == 0: print_stderr( "Data not ready (data must be already downloaded before performing pexec task): operation cancelled" ) else: sdhistorydao.add_history_line(sdconst.ACTION_PEXEC, selection_filename) print_stderr( "Post-processing task successfully submitted (order_dataset_count=%d,order_variable_count=%d)" % (order_dataset_count, order_variable_count)) else: print_stderr('Data not found') elif args.order_name == 'cds': selection_filename = None # use search-api operator to build datasets list stream = syndautils.get_stream(subcommand=args.subcommand, selection_file=args.selection_file, no_default=args.no_default) sddeferredbefore.add_forced_parameter(stream, 'type', 'Dataset') dataset_found_count = 0 order_variable_count = 0 for facets_group in stream: # we need to process each facets_group one by one because of TAG45345JK3J53K metadata = sdsearch.run( stream=[facets_group], post_pipeline_mode='dataset') # TAGJ43KJ234JK dataset_found_count += metadata.count() if metadata.count() > 0: # WART # (gets overwritten at each iteration, but not a big deal as always the same value) if selection_filename is None: # this is to keep the first found value (i.e. if last facets_group is empty but not the previous ones do not keep the last one (which would be None)) dataset = metadata.get_one_file() selection_filename = sdpostpipelineutils.get_attached_parameter__global( [dataset], 'selection_filename' ) # note that if no files are found at all for this selection (no matter the status), then the filename will be blank for d in metadata.get_files(): # warning: load list in memory if d['status'] == sdconst.DATASET_STATUS_COMPLETE: # TAG45J4K45JK # send cds variable order # (note: total number of variable event is given by: "total+=#variable for each ds") for v in d['variable']: if v in facets_group[ 'variable']: # TAG45345JK3J53K (we check here that the variable has been asked for in the first place) order_variable_count += 1 sdpporder.submit(sdconst.EVENT_CDS_VARIABLE, d['project'], d['model'], d['local_path'], variable=v, commit=False) sddb.conn.commit() if dataset_found_count > 0: if order_variable_count == 0: print_stderr( "Data not ready (data must be already downloaded before performing pexec task): operation cancelled" ) else: sdhistorydao.add_history_line(sdconst.ACTION_PEXEC, selection_filename) print_stderr( "Post-processing task successfully submitted (order_variable_count=%d)" % (order_variable_count)) else: print_stderr('Data not found') else: print_stderr("Invalid order name ('%s')" % args.order_name) return 1 return 0
def force_type(stream,type_): import sddeferredbefore # we 'force' (i.e. we do not just set as 'default') the parameter here, so # to prevent user to set it sddeferredbefore.add_forced_parameter(stream,'type',type_)