예제 #1
0
def test_index_hosts():
    print "ESGF indexes benchmark"
    print "======================"
    print ""

    ProgressThread.start(running_message='Building test query.. ',spinner_type=0,sleep=0.2,end_message=None)

    #parameter=get_test_query()
    parameter=get_random_test_query()

    parameter.append("limit=0")

    test_queries=sdpipeline.build_queries(parameter=parameter,index_host='<index_host>',load_default=False)

    ProgressThread.stop()

    test_query=test_queries[0]
    print "Test query"
    print "----------"
    print "%s"%test_query['url']
    print ""

    ProgressThread.start(running_message='Test running.. ',spinner_type=0,sleep=0.2,end_message=None)

    li=[]
    for index_host in sdindex.index_host_list:
        result=sdquickcount.run(index_host=index_host,parameter=parameter)
        li.append([index_host,result.num_found,result.call_duration if result.call_duration>=1 else 0.1])

    ProgressThread.stop()
    print "Result"
    print "------"
    li=sorted(li, key=lambda record: record[2])
    print tabulate(li,headers=['Index host','File count','Call duration (seconds)'],tablefmt="plain")
예제 #2
0
def test_index_hosts():
    print "ESGF indexes benchmark"
    print "======================"
    print ""

    ProgressThread.start(running_message='Building test query.. ',spinner_type=0,sleep=0.2,end_message=None)

    #parameter=get_test_query()
    parameter=get_random_test_query()

    parameter.append("limit=0")

    test_queries=sdpipeline.build_queries(parameter=parameter,index_host='<index_host>',load_default=False)

    ProgressThread.stop()

    test_query=test_queries[0]
    print "Test query"
    print "----------"
    print "%s"%test_query['url']
    print ""

    ProgressThread.start(running_message='Test running.. ',spinner_type=0,sleep=0.2,end_message=None)

    li=[]
    for index_host in sdindex.index_host_list:
        result=sdquicksearch.run(index_host=index_host,parameter=parameter)
        li.append([index_host,result.num_found,result.call_duration if result.call_duration>=1 else 0.1])

    ProgressThread.stop()
    print "Result"
    print "------"
    li=sorted(li, key=lambda record: record[2])
    print tabulate(li,headers=['Index host','File count','Call duration (seconds)'],tablefmt="plain")
예제 #3
0
def run(stream=None,
        path=None,
        parameter=None,
        dry_run=False,
        load_default=None):

    if parameter is None:
        parameter = []

    queries = sdpipeline.build_queries(stream=stream,
                                       path=path,
                                       parameter=parameter,
                                       query_type='local',
                                       dry_run=dry_run,
                                       load_default=load_default)

    files = []
    for query in queries:
        sqlquery = query['sqlquery']
        ap = query['attached_parameters']
        type_ = sddquery.get_scalar(
            ap, 'type')  # yes, get_scalar works also on attached_parameters

        if dry_run:
            print sqlquery
        else:
            files.extend(get_files(sqlquery, type_))

    return files
예제 #4
0
파일: sdsearch.py 프로젝트: ncarenton/synda
def run(stream=None,selection=None,path=None,parameter=[],post_pipeline_mode='file',parallel=True,index_host=None,dry_run=False,load_default=None):
    squeries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,selection=selection,parallel=parallel,index_host=index_host,dry_run=dry_run,load_default=load_default)

    # Prevent use of 'limit' keyword ('limit' keyword can't be used in this module because it interfere with the pagination system)
    for q in squeries:
        if sdtools.url_contains_limit_keyword(q['url']):
            raise SDException('SDSEARCH-001',"'limit' facet is not supported in this module. Use 'sdquicksearch' module instead.")

    if dry_run:
        sdsqueries.print_(squeries)
    else:
        progress=sdsqueries.get_scalar(squeries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly)
        if progress:
            #sdtools.print_stderr(sdi18n.m0003(ap.get('searchapi_host'))) # waiting message
            ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start

        files=sdrun.run(squeries,parallel)
        files=sdpipeline.post_pipeline(files,post_pipeline_mode)

        if progress:
            ProgressThread.stop() # spinner stop

        return files

    return []
예제 #5
0
def run(stream=None,
        path=None,
        parameter=None,
        index_host=None,
        post_pipeline_mode='file',
        dry_run=False):

    if parameter is None:
        parameter = []

    queries = sdpipeline.build_queries(stream=stream,
                                       path=path,
                                       parameter=parameter,
                                       index_host=index_host,
                                       parallel=False,
                                       load_default=False)

    if len(queries) < 1:
        raise SDException("SDQSEARC-001", "No query to process")

    progress = sdsqueries.get_scalar(
        queries, 'progress', False, type_=bool
    )  # we cast here as progress can be str (set from parameter) or bool (set programmaticaly)
    searchapi_host = sdsqueries.get_scalar(queries, 'searchapi_host')

    if dry_run:
        for query in queries:
            request = sdtypes.Request(url=query['url'], pagination=False)

            print '%s' % request.get_url()

            # debug
            #print 'Url: %s'%request.get_url()
            #print 'Attached parameters: %s'%query.get('attached_parameters')

        return sdtypes.Response()
    else:
        try:
            if progress:
                sdtools.print_stderr(
                    sdi18n.m0003(searchapi_host)
                )  # waiting message => TODO: move into ProgressThread class
                ProgressThread.start(
                    sleep=0.1,
                    running_message='',
                    end_message='Search completed.')  # spinner start

            mqr = process_queries(queries)
            metadata = mqr.to_metadata()

            sdlog.debug("SDQSEARC-002", "files-count=%d" % metadata.count())
            metadata = sdpipeline.post_pipeline(metadata, post_pipeline_mode)
            sdlog.debug("SDQSEARC-004", "files-count=%d" % metadata.count())

            return metadata
        finally:
            if progress:
                ProgressThread.stop()  # spinner stop
예제 #6
0
def run(stream=None,
        path=None,
        parameter=[],
        index_host=None,
        dry_run=False,
        type_=sdconst.SA_TYPE_DATASET):

    # type management
    if stream is not None:
        sddeferredbefore.add_forced_parameter(stream, 'type', type_)
    else:

        # if stream is None, we assume 'parameter' mode
        # (see TAGJFJ4R4JKFFJD for more informations)
        sddeferredbefore.add_forced_parameter(parameter, 'type', type_)

    queries = sdpipeline.build_queries(stream=stream,
                                       path=path,
                                       parameter=parameter,
                                       index_host=index_host,
                                       parallel=False,
                                       load_default=False,
                                       count=True)

    if len(queries) < 1:
        raise SDException("SDQSEARC-001", "No query to process")

    # we don't support multiple queries because of duplicate/intersection between queries
    # (i.e. which num_found attribute to use (from which query..))
    if len(queries) > 1:
        raise SDException(
            "SDQSEARC-100",
            "Too much query (multi-query is not allowed in this module, use sdquicksearch instead)"
        )

    query = queries[0]

    if dry_run:
        request = sdtypes.Request(url=query['url'], pagination=False)

        print '%s' % request.get_url()

        # debug
        #print 'Url: %s'%request.get_url()
        #print 'Attached parameters: %s'%query.get('attached_parameters')

        return sdtypes.Response()
    else:
        return ws_call(query)  # return Response object
예제 #7
0
def run(stream=None,path=None,parameter=[],dry_run=False,load_default=None):
    queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,query_type='local',dry_run=dry_run,load_default=load_default)

    files=[]
    for query in queries:
        sqlquery=query['sqlquery']
        ap=query['attached_parameters']
        type_=sddquery.get_scalar(ap,'type') # yes, get_scalar works also on attached_parameters

        if dry_run:
            print sqlquery
        else:
            files.extend(get_files(sqlquery,type_))

    return files
예제 #8
0
def run(stream=None,selection=None,path=None,parameter=[],post_pipeline_mode='file',parallel=True,index_host=None,dry_run=False,load_default=None):
    """
    Note
        squeries means 'Serialized queries'
    """

    squeries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,selection=selection,parallel=parallel,index_host=index_host,dry_run=dry_run,load_default=load_default)

    action=sdsqueries.get_scalar(squeries,'action',None)
    progress=sdsqueries.get_scalar(squeries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly)

    # Prevent use of 'limit' keyword ('limit' keyword can't be used in this module because it interfere with the pagination system)
    for q in squeries:
        if sdtools.url_contains_limit_keyword(q['url']):
            raise SDException('SDSEARCH-001',"'limit' facet is not supported in this mode. Use 'sdquicksearch' module instead.")

    if dry_run:
        sdsqueries.print_(squeries)
    else:
        if progress:
            #sdtools.print_stderr(sdi18n.m0003(ap.get('searchapi_host'))) # waiting message
            ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start

        # retrieve files
        files=sdrun.run(squeries,parallel)

        # post-processing
        files=sdpipeline.post_pipeline(files,post_pipeline_mode)


        # HACK
        #
        # second run to retrieve dataset timestamps in one row
        #
        # MEMO: when action is 'install', type is always 'File' (i.e. this code gets executed only for type=File)
        #
        if action is not None:
            if action=='install':
                files=sdbatchtimestamp.add_dataset_timestamp(squeries,files,parallel)


        if progress:
            ProgressThread.stop() # spinner stop

        return files

    return []
예제 #9
0
def run(stream=None,
        selection=None,
        path=None,
        parameter=None,
        post_pipeline_mode='file',
        parallel=sdconfig.metadata_parallel_download,
        index_host=None,
        dry_run=False,
        load_default=None,
        playback=None,
        record=None):
    """
    Note
        squeries means 'Serialized queries'
    """

    if parameter is None:
        parameter=[]

    squeries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,selection=selection,parallel=parallel,index_host=index_host,dry_run=dry_run,load_default=load_default)

    action=sdsqueries.get_scalar(squeries,'action',None)
    progress=sdsqueries.get_scalar(squeries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly)

    # Prevent use of 'limit' keyword ('limit' keyword can't be used in this module because it interfere with the pagination system)
    for q in squeries:
        if sdtools.url_contains_limit_keyword(q['url']):
            raise SDException('SDSEARCH-001',"'limit' facet is not supported in this mode. Use 'sdquicksearch' module instead.")

    if dry_run:
        sdsqueries.print_(squeries)
        return sdtypes.Metadata()
    else:
        if progress:
            #sdtools.print_stderr(sdi18n.m0003(ap.get('searchapi_host'))) # waiting message
            ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start

        metadata=_get_files(squeries,parallel,post_pipeline_mode,action,playback,record)

        if progress:
            ProgressThread.stop() # spinner stop

        return metadata
예제 #10
0
def run(stream=None,path=None,parameter=None,index_host=None,post_pipeline_mode='file',dry_run=False):

    if parameter is None:
        parameter=[]

    queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False)

    if len(queries)<1:
        raise SDException("SDQSEARC-001","No query to process")

    progress=sdsqueries.get_scalar(queries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly)
    searchapi_host=sdsqueries.get_scalar(queries,'searchapi_host')


    if dry_run:
        for query in queries:
            request=sdtypes.Request(url=query['url'],pagination=False)

            print '%s'%request.get_url()

            # debug
            #print 'Url: %s'%request.get_url()
            #print 'Attached parameters: %s'%query.get('attached_parameters')

        return sdtypes.Response()
    else:
        try:
            if progress:
                sdtools.print_stderr(sdi18n.m0003(searchapi_host)) # waiting message => TODO: move into ProgressThread class
                ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start

            mqr=process_queries(queries)
            metadata=mqr.to_metadata()

            sdlog.debug("SDQSEARC-002","files-count=%d"%metadata.count())
            metadata=sdpipeline.post_pipeline(metadata,post_pipeline_mode)
            sdlog.debug("SDQSEARC-004","files-count=%d"%metadata.count())

            return metadata
        finally:
            if progress:
                ProgressThread.stop() # spinner stop
예제 #11
0
def run(stream=None,path=None,parameter=[],index_host=None,post_pipeline_mode='file',dry_run=False,count=False):
    queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False,count=count)

    if len(queries)<1:
        raise SDException("SDQSEARC-001","No query to process")

    progress=sdsqueries.get_scalar(queries,'progress',False,type_=bool) # we cast here as progress can be str (set from parameter) or bool (set programmaticaly)
    searchapi_host=sdsqueries.get_scalar(queries,'searchapi_host')


    if dry_run:
        for query in queries:
            request=Request(url=query['url'],pagination=False)

            print '%s'%request.get_url()

            # debug
            #print 'Url: %s'%request.get_url()
            #print 'Attached parameters: %s'%query.get('attached_parameters')

        return Response()
    else:
        try:
            if progress:
                sdtools.print_stderr(sdi18n.m0003(searchapi_host)) # waiting message => TODO: move into ProgressThread class
                ProgressThread.start(sleep=0.1,running_message='',end_message='Search completed.') # spinner start

            result=process_queries(queries) # return Response object

            # post-call-processing
            result.files=sdpipeline.post_pipeline(result.files,post_pipeline_mode)
            result.num_result=len(result.files) # sync objec attributes (yes, maybe not the best place to do that). We do that because sdpipeline.post_pipeline() method is likely to change the number of items in 'files' attribute (i.e. without updating the corresponding 'num_result' attribute, so we need to do it here).

            return result
        finally:
            if progress:
                ProgressThread.stop() # spinner stop
예제 #12
0
def run(stream=None,path=None,parameter=[],index_host=None,dry_run=False,type_=sdconst.SA_TYPE_DATASET):


    # type management
    if stream is not None:
        sddeferredbefore.add_forced_parameter(stream,'type',type_)
    else:

        # if stream is None, we assume 'parameter' mode
        # (see TAGJFJ4R4JKFFJD for more informations)
        sddeferredbefore.add_forced_parameter(parameter,'type',type_)


    queries=sdpipeline.build_queries(stream=stream,path=path,parameter=parameter,index_host=index_host,parallel=False,load_default=False,count=True)

    if len(queries)<1:
        raise SDException("SDQSEARC-001","No query to process")

    # we don't support multiple queries because of duplicate/intersection between queries
    # (i.e. which num_found attribute to use (from which query..))
    if len(queries)>1:
        raise SDException("SDQSEARC-100","Too much query (multi-query is not allowed in this module, use sdquicksearch instead)")

    query=queries[0]

    if dry_run:
        request=sdtypes.Request(url=query['url'],pagination=False)

        print '%s'%request.get_url()

        # debug
        #print 'Url: %s'%request.get_url()
        #print 'Attached parameters: %s'%query.get('attached_parameters')

        return sdtypes.Response()
    else:
        return ws_call(query) # return Response object
예제 #13
0
import sys
import os
import argparse
import json
import sdapp
import sdpipeline
import sdindex
import sdrun
import sdprint
import sdproxy_mt

output_dir='/tmp/sdcmpindexes'

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('selection_file',nargs='?',default='-',help='Selection file')
    args = parser.parse_args()

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir) 

    queries=sdpipeline.build_queries(path=args.selection_file)

    for index_host in sdindex.index_host_list:
        sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index
        metadata=sdrun.run(queries)
        metadata=sdpipeline.post_pipeline(metadata,'generic') # this is to exclude malformed files if any

        with open('%s/%s'%(output_dir,index_host),'w') as fh:
            sdprint.print_format(metadata.get_files(),'line',fh=fh)
예제 #14
0
import sys
import os
import argparse
import json
import sdapp
import sdpipeline
import sdindex
import sdrun
import sdprint
import sdproxy_mt

output_dir='/tmp/sdcmpindexes'

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('file',nargs='?',default='-',help='Selection file')
    args = parser.parse_args()

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir) 

    queries=sdpipeline.build_queries(path=args.file)

    for index_host in sdindex.index_host_list:
        sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index
        files=sdrun.run(queries)
        files=sdpipeline.post_pipeline(files,'generic') # this is to exclude malformed files if any

        with open('%s/%s'%(output_dir,index_host),'w') as fh:
            sdprint.print_format(files,'line',fh=fh)