def test_index_hosts(): print "ESGF indexes benchmark" print "======================" print "" ProgressThread.start(running_message='Building test query.. ',spinner_type=0,sleep=0.2,end_message=None) #parameter=get_test_query() parameter=get_random_test_query() parameter.append("limit=0") test_queries=sdpipeline.build_queries(parameter=parameter,index_host='<index_host>',load_default=False) ProgressThread.stop() test_query=test_queries[0] print "Test query" print "----------" print "%s"%test_query['url'] print "" ProgressThread.start(running_message='Test running.. ',spinner_type=0,sleep=0.2,end_message=None) li=[] for index_host in sdindex.index_host_list: result=sdquickcount.run(index_host=index_host,parameter=parameter) li.append([index_host,result.num_found,result.call_duration if result.call_duration>=1 else 0.1]) ProgressThread.stop() print "Result" print "------" li=sorted(li, key=lambda record: record[2]) print tabulate(li,headers=['Index host','File count','Call duration (seconds)'],tablefmt="plain")
def test_index_hosts(): print "ESGF indexes benchmark" print "======================" print "" ProgressThread.start(running_message='Building test query.. ',spinner_type=0,sleep=0.2,end_message=None) #parameter=get_test_query() parameter=get_random_test_query() parameter.append("limit=0") test_queries=sdpipeline.build_queries(parameter=parameter,index_host='<index_host>',load_default=False) ProgressThread.stop() test_query=test_queries[0] print "Test query" print "----------" print "%s"%test_query['url'] print "" ProgressThread.start(running_message='Test running.. ',spinner_type=0,sleep=0.2,end_message=None) li=[] for index_host in sdindex.index_host_list: result=sdquicksearch.run(index_host=index_host,parameter=parameter) li.append([index_host,result.num_found,result.call_duration if result.call_duration>=1 else 0.1]) ProgressThread.stop() print "Result" print "------" li=sorted(li, key=lambda record: record[2]) print tabulate(li,headers=['Index host','File count','Call duration (seconds)'],tablefmt="plain")
def run(stream=None, path=None, parameter=None, dry_run=False, load_default=None): if parameter is None: parameter = [] queries = sdpipeline.build_queries(stream=stream, path=path, parameter=parameter, query_type='local', dry_run=dry_run, load_default=load_default) files = [] for query in queries: sqlquery = query['sqlquery'] ap = query['attached_parameters'] type_ = sddquery.get_scalar( ap, 'type') # yes, get_scalar works also on attached_parameters if dry_run: print sqlquery else: files.extend(get_files(sqlquery, type_)) return files
def run(stream=None,selection=None,path=None,parameter=[],post_pipeline_mode='file',parallel=True,index_host=None,dry_run=False,load_default=None): squeries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,selection=selection,parallel=parallel,index_host=index_host,dry_run=dry_run,load_default=load_default) # Prevent use of 'limit' keyword ('limit' keyword can't be used in this module because it interfere with the pagination system) for q in squeries: if sdtools.url_contains_limit_keyword(q['url']): raise SDException('SDSEARCH-001',"'limit' facet is not supported in this module. Use 'sdquicksearch' module instead.") if dry_run: sdsqueries.print_(squeries) else: progress=sdsqueries.get_scalar(squeries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) if progress: #sdtools.print_stderr(sdi18n.m0003(ap.get('searchapi_host'))) # waiting message ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start files=sdrun.run(squeries,parallel) files=sdpipeline.post_pipeline(files,post_pipeline_mode) if progress: ProgressThread.stop() # spinner stop return files return []
def run(stream=None, path=None, parameter=None, index_host=None, post_pipeline_mode='file', dry_run=False): if parameter is None: parameter = [] queries = sdpipeline.build_queries(stream=stream, path=path, parameter=parameter, index_host=index_host, parallel=False, load_default=False) if len(queries) < 1: raise SDException("SDQSEARC-001", "No query to process") progress = sdsqueries.get_scalar( queries, 'progress', False, type_=bool ) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) searchapi_host = sdsqueries.get_scalar(queries, 'searchapi_host') if dry_run: for query in queries: request = sdtypes.Request(url=query['url'], pagination=False) print '%s' % request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: try: if progress: sdtools.print_stderr( sdi18n.m0003(searchapi_host) ) # waiting message => TODO: move into ProgressThread class ProgressThread.start( sleep=0.1, running_message='', end_message='Search completed.') # spinner start mqr = process_queries(queries) metadata = mqr.to_metadata() sdlog.debug("SDQSEARC-002", "files-count=%d" % metadata.count()) metadata = sdpipeline.post_pipeline(metadata, post_pipeline_mode) sdlog.debug("SDQSEARC-004", "files-count=%d" % metadata.count()) return metadata finally: if progress: ProgressThread.stop() # spinner stop
def run(stream=None, path=None, parameter=[], index_host=None, dry_run=False, type_=sdconst.SA_TYPE_DATASET): # type management if stream is not None: sddeferredbefore.add_forced_parameter(stream, 'type', type_) else: # if stream is None, we assume 'parameter' mode # (see TAGJFJ4R4JKFFJD for more informations) sddeferredbefore.add_forced_parameter(parameter, 'type', type_) queries = sdpipeline.build_queries(stream=stream, path=path, parameter=parameter, index_host=index_host, parallel=False, load_default=False, count=True) if len(queries) < 1: raise SDException("SDQSEARC-001", "No query to process") # we don't support multiple queries because of duplicate/intersection between queries # (i.e. which num_found attribute to use (from which query..)) if len(queries) > 1: raise SDException( "SDQSEARC-100", "Too much query (multi-query is not allowed in this module, use sdquicksearch instead)" ) query = queries[0] if dry_run: request = sdtypes.Request(url=query['url'], pagination=False) print '%s' % request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: return ws_call(query) # return Response object
def run(stream=None,path=None,parameter=[],dry_run=False,load_default=None): queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,query_type='local',dry_run=dry_run,load_default=load_default) files=[] for query in queries: sqlquery=query['sqlquery'] ap=query['attached_parameters'] type_=sddquery.get_scalar(ap,'type') # yes, get_scalar works also on attached_parameters if dry_run: print sqlquery else: files.extend(get_files(sqlquery,type_)) return files
def run(stream=None,selection=None,path=None,parameter=[],post_pipeline_mode='file',parallel=True,index_host=None,dry_run=False,load_default=None): """ Note squeries means 'Serialized queries' """ squeries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,selection=selection,parallel=parallel,index_host=index_host,dry_run=dry_run,load_default=load_default) action=sdsqueries.get_scalar(squeries,'action',None) progress=sdsqueries.get_scalar(squeries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) # Prevent use of 'limit' keyword ('limit' keyword can't be used in this module because it interfere with the pagination system) for q in squeries: if sdtools.url_contains_limit_keyword(q['url']): raise SDException('SDSEARCH-001',"'limit' facet is not supported in this mode. Use 'sdquicksearch' module instead.") if dry_run: sdsqueries.print_(squeries) else: if progress: #sdtools.print_stderr(sdi18n.m0003(ap.get('searchapi_host'))) # waiting message ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start # retrieve files files=sdrun.run(squeries,parallel) # post-processing files=sdpipeline.post_pipeline(files,post_pipeline_mode) # HACK # # second run to retrieve dataset timestamps in one row # # MEMO: when action is 'install', type is always 'File' (i.e. this code gets executed only for type=File) # if action is not None: if action=='install': files=sdbatchtimestamp.add_dataset_timestamp(squeries,files,parallel) if progress: ProgressThread.stop() # spinner stop return files return []
def run(stream=None, selection=None, path=None, parameter=None, post_pipeline_mode='file', parallel=sdconfig.metadata_parallel_download, index_host=None, dry_run=False, load_default=None, playback=None, record=None): """ Note squeries means 'Serialized queries' """ if parameter is None: parameter=[] squeries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,selection=selection,parallel=parallel,index_host=index_host,dry_run=dry_run,load_default=load_default) action=sdsqueries.get_scalar(squeries,'action',None) progress=sdsqueries.get_scalar(squeries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) # Prevent use of 'limit' keyword ('limit' keyword can't be used in this module because it interfere with the pagination system) for q in squeries: if sdtools.url_contains_limit_keyword(q['url']): raise SDException('SDSEARCH-001',"'limit' facet is not supported in this mode. Use 'sdquicksearch' module instead.") if dry_run: sdsqueries.print_(squeries) return sdtypes.Metadata() else: if progress: #sdtools.print_stderr(sdi18n.m0003(ap.get('searchapi_host'))) # waiting message ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start metadata=_get_files(squeries,parallel,post_pipeline_mode,action,playback,record) if progress: ProgressThread.stop() # spinner stop return metadata
def run(stream=None,path=None,parameter=None,index_host=None,post_pipeline_mode='file',dry_run=False): if parameter is None: parameter=[] queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False) if len(queries)<1: raise SDException("SDQSEARC-001","No query to process") progress=sdsqueries.get_scalar(queries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) searchapi_host=sdsqueries.get_scalar(queries,'searchapi_host') if dry_run: for query in queries: request=sdtypes.Request(url=query['url'],pagination=False) print '%s'%request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: try: if progress: sdtools.print_stderr(sdi18n.m0003(searchapi_host)) # waiting message => TODO: move into ProgressThread class ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start mqr=process_queries(queries) metadata=mqr.to_metadata() sdlog.debug("SDQSEARC-002","files-count=%d"%metadata.count()) metadata=sdpipeline.post_pipeline(metadata,post_pipeline_mode) sdlog.debug("SDQSEARC-004","files-count=%d"%metadata.count()) return metadata finally: if progress: ProgressThread.stop() # spinner stop
def run(stream=None,path=None,parameter=[],index_host=None,post_pipeline_mode='file',dry_run=False,count=False): queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False,count=count) if len(queries)<1: raise SDException("SDQSEARC-001","No query to process") progress=sdsqueries.get_scalar(queries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly) searchapi_host=sdsqueries.get_scalar(queries,'searchapi_host') if dry_run: for query in queries: request=Request(url=query['url'],pagination=False) print '%s'%request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return Response() else: try: if progress: sdtools.print_stderr(sdi18n.m0003(searchapi_host)) # waiting message => TODO: move into ProgressThread class ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start result=process_queries(queries) # return Response object # post-call-processing result.files=sdpipeline.post_pipeline(result.files,post_pipeline_mode) result.num_result=len(result.files) # sync objec attributes (yes, maybe not the best place to do that). We do that because sdpipeline.post_pipeline() method is likely to change the number of items in 'files' attribute (i.e. without updating the corresponding 'num_result' attribute, so we need to do it here). return result finally: if progress: ProgressThread.stop() # spinner stop
def run(stream=None,path=None,parameter=[],index_host=None,dry_run=False,type_=sdconst.SA_TYPE_DATASET): # type management if stream is not None: sddeferredbefore.add_forced_parameter(stream,'type',type_) else: # if stream is None, we assume 'parameter' mode # (see TAGJFJ4R4JKFFJD for more informations) sddeferredbefore.add_forced_parameter(parameter,'type',type_) queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False,count=True) if len(queries)<1: raise SDException("SDQSEARC-001","No query to process") # we don't support multiple queries because of duplicate/intersection between queries # (i.e. which num_found attribute to use (from which query..)) if len(queries)>1: raise SDException("SDQSEARC-100","Too much query (multi-query is not allowed in this module, use sdquicksearch instead)") query=queries[0] if dry_run: request=sdtypes.Request(url=query['url'],pagination=False) print '%s'%request.get_url() # debug #print 'Url: %s'%request.get_url() #print 'Attached parameters: %s'%query.get('attached_parameters') return sdtypes.Response() else: return ws_call(query) # return Response object
import sys import os import argparse import json import sdapp import sdpipeline import sdindex import sdrun import sdprint import sdproxy_mt output_dir='/tmp/sdcmpindexes' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('selection_file',nargs='?',default='-',help='Selection file') args = parser.parse_args() if not os.path.isdir(output_dir): os.mkdir(output_dir) queries=sdpipeline.build_queries(path=args.selection_file) for index_host in sdindex.index_host_list: sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index metadata=sdrun.run(queries) metadata=sdpipeline.post_pipeline(metadata,'generic') # this is to exclude malformed files if any with open('%s/%s'%(output_dir,index_host),'w') as fh: sdprint.print_format(metadata.get_files(),'line',fh=fh)
import sys import os import argparse import json import sdapp import sdpipeline import sdindex import sdrun import sdprint import sdproxy_mt output_dir='/tmp/sdcmpindexes' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('file',nargs='?',default='-',help='Selection file') args = parser.parse_args() if not os.path.isdir(output_dir): os.mkdir(output_dir) queries=sdpipeline.build_queries(path=args.file) for index_host in sdindex.index_host_list: sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index files=sdrun.run(queries) files=sdpipeline.post_pipeline(files,'generic') # this is to exclude malformed files if any with open('%s/%s'%(output_dir,index_host),'w') as fh: sdprint.print_format(files,'line',fh=fh)