def file_dump(args): import sdrfile, sddeferredafter, sdcolumnfilter, sdreducecol sddeferredafter.add_default_parameter(args.stream, 'limit', sdconfig.get_default_limit('dump')) if args.raw_mode: post_pipeline_mode = None args.all = True # we force '--all' option when '--raw_mode' option is set else: post_pipeline_mode = 'file' files = sdrfile.get_files(stream=args.stream, post_pipeline_mode=post_pipeline_mode, dry_run=args.dry_run) if args.all: # do not hide any attribute pass else: # hide non essential attributes files = sdreducecol.run(files) if not args.dry_run: if len(files) > 0: files = sdcolumnfilter.run(files, args.column) sdprint.print_format(files, args.format) else: print_stderr("File not found")
def file_dump(args): import sdrfile, sddeferredafter, sdcolumnfilter, sdreducecol sddeferredafter.add_default_parameter(args.stream,'limit',100) if args.raw_mode: post_pipeline_mode=None args.all=True # we force '--all' option when '--raw_mode' option is set else: post_pipeline_mode='file' files=sdrfile.get_files(stream=args.stream,post_pipeline_mode=post_pipeline_mode,dry_run=args.dry_run) if args.all: # do not hide any attribute pass else: # hide non essential attributes files=sdreducecol.run(files) if not args.dry_run: if len(files)>0: files=sdcolumnfilter.run(files,args.column) sdprint.print_format(files,args.format) else: print_stderr("File not found")
def dataset_dump(args): import sdrdataset, sddeferredafter, sdcolumnfilter sddeferredafter.add_default_parameter(args.stream,'limit',100) post_pipeline_mode=None if args.raw_mode else 'dataset' files=sdrdataset.get_datasets(stream=args.stream,post_pipeline_mode=post_pipeline_mode,dry_run=args.dry_run) if not args.dry_run: if len(files)>0: files=sdcolumnfilter.run(files,args.column) sdprint.print_format(files,args.format) else: print_stderr('Dataset not found')
def dataset_dump(args): import sdrdataset, sddeferredafter, sdcolumnfilter sddeferredafter.add_default_parameter(args.stream, 'limit', sdconfig.get_default_limit('dump')) post_pipeline_mode = None if args.raw_mode else 'dataset' files = sdrdataset.get_datasets(stream=args.stream, post_pipeline_mode=post_pipeline_mode, dry_run=args.dry_run) if not args.dry_run: if len(files) > 0: files = sdcolumnfilter.run(files, args.column) sdprint.print_format(files, args.format) else: print_stderr('Dataset not found')
# sample aggregation file # cmip5.output1.BCC.bcc-csm1-1-m.rcp26.day.atmos.day.r1i1p1.clt.20120910.aggregation result = ('.aggregation' in f.get("title")) elif f["type"] == sdconst.SA_TYPE_DATASET: # sample aggregation dataset # obs4MIPs.LOA_IPSL.PARASOL.day.parasolRefl.1.aggregation.8 result = ('.aggregation.' in f.get("instance_id")) else: raise SDException('SDRMAGGR-001', 'Incorrect type (%s)' % f["type"]) return result if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-1', '--print_only_one_item', action='store_true') parser.add_argument('-F', '--format', choices=sdprint.formats, default='raw') args = parser.parse_args() files = json.load(sys.stdin) files = run(files) sdprint.print_format(files, args.format, args.print_only_one_item)
# build 'seen' data structure (list of dict => dict (id=>bool)) seen=dict((f[functional_id_keyname], False) for f in light_metadata.get_files()) # warning: load list in memory sdlog.info("SYNDRMDR-002","Perform duplicate and replicate suppression..") po=sdpipelineprocessing.ProcessingObject(remove,functional_id_keyname,seen) metadata=sdpipelineprocessing.run_pipeline(metadata,po) return metadata def remove(files,functional_id_keyname,seen): new_files=[] for f in files: uniq_id=f[functional_id_keyname] if not seen[uniq_id]: new_files.append(f) seen[uniq_id]=True # mark as seen so other duplicate will be excluded (first item in the loop win) return new_files if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-1','--print_only_one_item',action='store_true') parser.add_argument('-F','--format',choices=sdprint.formats,default='raw') args = parser.parse_args() files=json.load( sys.stdin ) files=run(files) sdprint.print_format(files,args.format,args.print_only_one_item)
def run(facets_groups): for facets_group in facets_groups: sdtranslate.translate_value(facets_group,value_rules) return facets_groups value_rules={ 'model': { 'inmcm4' :'INM-CM4', 'bcc-csm1-1' :'BCC-CSM1-1', 'bcc-csm1-1-m':'BCC-CSM1-1-m', 'GFDL-CM2p1' :'GFDL-CM2-1' }, 'institute': { 'CCCma':'CCCMA' } } if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-1','--print_only_one_item',action='store_true') parser.add_argument('-F','--format',choices=sdprint.formats,default='raw') args = parser.parse_args() facets_groups=json.load( sys.stdin ) facets_groups=run(facets_groups) sdprint.print_format(facets_groups,args.format,args.print_only_one_item)
Initially designed to batch update attribute in Synda database (e.g. when a new attribute is decided to be stored in Synda, all already downloaded files metadata must be updated). """ stream=sdstreamutils.get_stream(parameter=parameter,selection_file=selection_file,no_default=no_default) sddeferredafter.add_forced_parameter(stream,'replica','false') sddeferredafter.add_forced_parameter(stream,'type',type_) assert fields is not None sddeferredafter.add_forced_parameter(stream,'fields',fields) metadata=sdsearch.run(stream=stream,post_pipeline_mode=None,dry_run=dry_run,playback=playback,record=record) return metadata.get_files() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('parameter',nargs='*',default=[]) parser.add_argument('-f','--fields',default=sdfields.get_sample_fields()) parser.add_argument('-F','--format',choices=sdprint.formats,default='indent') parser.add_argument('-y','--dry_run',action='store_true') args = parser.parse_args() files=dump_ESGF(parameter=args.parameter,fields=args.fields,dry_run=args.dry_run) if not args.dry_run: sdprint.print_format(files,args.format)
parser.add_argument('-f','--file',default=None) parser.add_argument('-F','--format',choices=sdprint.formats,default='raw') parser.add_argument('-i','--index_host') parser.add_argument('-m','--post_pipeline_mode',default='file',choices=sdconst.POST_PIPELINE_MODES) parser.add_argument('-y','--dry_run',action='store_true') parser.add_argument('-1','--print_only_one_item',action='store_true') sdcommonarg.add_playback_record_options(parser) parser.add_argument('--load-default',dest='load_default',action='store_true') parser.add_argument('--no-load-default',dest='load_default',action='store_false') parser.set_defaults(load_default=None) parser.add_argument('--parallel',dest='parallel',action='store_true') parser.add_argument('--no-parallel',dest='parallel',action='store_false') parser.set_defaults(parallel=True) args = parser.parse_args() metadata=run(path=args.file, parameter=args.parameter, post_pipeline_mode=args.post_pipeline_mode, dry_run=args.dry_run, load_default=args.load_default, parallel=args.parallel, playback=args.playback, record=args.record) if not args.dry_run: sdprint.print_format(metadata.get_files(),args.format,args.print_only_one_item) # warning: load list in memory
################################## """This module filters a columns list.""" import sys import argparse import json import sdprint def run(files,key_list_to_keep): if len(key_list_to_keep)==0: return files else: new_list=[] for f in files: new_list.append(dict((k, f[k]) for k in f if k in key_list_to_keep)) return new_list if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-C','--column',type=lambda s: s.split(','),default=[],help="set column(s) to keep") parser.add_argument('-F','--format',choices=sdprint.formats,default='raw') args = parser.parse_args() files=json.load( sys.stdin ) files=run(files,args.column) sdprint.print_format(files,args.format)
Example input name='variable' values=['tasmin','tasmax'] output "variable=tasmin OR variable=tasmax" """ l=[] for v in values: l.append("%s='%s'"%(name,v)) if len(l)>0: buf=" OR ".join(l) return "(%s)"%buf else: return "" # init. if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('file',nargs='?',default='-',help='Facets groups file') parser.add_argument('-F','--format',choices=sdprint.formats,default='raw') parser.add_argument('-1','--print_only_one_item',action='store_true') args = parser.parse_args() facets_groups=sdpipelineutils.get_input_data(args.file) queries=run(facets_groups) sdprint.print_format(queries,args.format,args.print_only_one_item)
import sys import os import argparse import json import sdapp import sdpipeline import sdindex import sdrun import sdprint import sdproxy_mt output_dir='/tmp/sdcmpindexes' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('selection_file',nargs='?',default='-',help='Selection file') args = parser.parse_args() if not os.path.isdir(output_dir): os.mkdir(output_dir) queries=sdpipeline.build_queries(path=args.selection_file) for index_host in sdindex.index_host_list: sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index metadata=sdrun.run(queries) metadata=sdpipeline.post_pipeline(metadata,'generic') # this is to exclude malformed files if any with open('%s/%s'%(output_dir,index_host),'w') as fh: sdprint.print_format(metadata.get_files(),'line',fh=fh)
def transform_facets_for_dataset_attrs_retrieval(facets): """Force attributes for dataset attrs retrieval.""" # do not alter original facets object facets_cpy = copy.deepcopy(facets) facets_cpy['type'] = ['Dataset'] facets_cpy['fields'] = ['*'] return facets_cpy # init. if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('file', nargs='?', default='-', help='Facets groups file') parser.add_argument('-F', '--format', choices=sdprint.formats, default='raw') parser.add_argument('-1', '--print_only_one_item', action='store_true') args = parser.parse_args() facets_groups = sdpipelineutils.get_input_data(args.file) queries = run(facets_groups) sdprint.print_format(queries, args.format, args.print_only_one_item)
return mqr def ws_call(query): request=sdtypes.Request(url=query['url'],pagination=False) result=sdnetutils.call_web_service(request.get_url(),timeout=sdconst.SEARCH_API_HTTP_TIMEOUT) # return Response object if result.count()>=sdconst.SEARCH_API_CHUNKSIZE: raise SDException("SDQSEARC-002","Number of returned files reach maximum limit") result=sdaddap.run(result,query.get('attached_parameters',{})) return result if __name__ == '__main__': prog=os.path.basename(__file__) parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""examples of use\n%s"""%sdcliex.search(prog)) parser.add_argument('parameter',nargs='*',default=[],help=sdi18n.m0001) parser.add_argument('-F','--format',choices=sdprint.formats,default='indent') parser.add_argument('-i','--index_host') parser.add_argument('-m','--post_pipeline_mode',default='file') parser.add_argument('-y','--dry_run',action='store_true') parser.add_argument('-1','--print_only_one_item',action='store_true') args = parser.parse_args() result=run(parameter=args.parameter,index_host=args.index_host,post_pipeline_mode=args.post_pipeline_mode,dry_run=args.dry_run) sdprint.print_format(result.get_files(),args.format,args.print_only_one_item)
import sys import os import argparse import json import sdapp import sdpipeline import sdindex import sdrun import sdprint import sdproxy_mt output_dir='/tmp/sdcmpindexes' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('file',nargs='?',default='-',help='Selection file') args = parser.parse_args() if not os.path.isdir(output_dir): os.mkdir(output_dir) queries=sdpipeline.build_queries(path=args.file) for index_host in sdindex.index_host_list: sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index files=sdrun.run(queries) files=sdpipeline.post_pipeline(files,'generic') # this is to exclude malformed files if any with open('%s/%s'%(output_dir,index_host),'w') as fh: sdprint.print_format(files,'line',fh=fh)
return result if __name__ == '__main__': prog = os.path.basename(__file__) parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""examples of use\n%s""" % sdcliex.search(prog)) parser.add_argument('parameter', nargs='*', default=[], help=sdi18n.m0001) parser.add_argument('-F', '--format', choices=sdprint.formats, default='indent') parser.add_argument('-i', '--index_host') parser.add_argument('-m', '--post_pipeline_mode', default='file') parser.add_argument('-y', '--dry_run', action='store_true') parser.add_argument('-1', '--print_only_one_item', action='store_true') args = parser.parse_args() result = run(parameter=args.parameter, index_host=args.index_host, post_pipeline_mode=args.post_pipeline_mode, dry_run=args.dry_run) sdprint.print_format(result.get_files(), args.format, args.print_only_one_item)