コード例 #1
0
ファイル: sdtsaction.py プロジェクト: nedclimaterisk/synda
def file_dump(args):
    import sdrfile, sddeferredafter, sdcolumnfilter, sdreducecol

    sddeferredafter.add_default_parameter(args.stream, 'limit',
                                          sdconfig.get_default_limit('dump'))

    if args.raw_mode:
        post_pipeline_mode = None

        args.all = True  # we force '--all' option when '--raw_mode' option is set
    else:
        post_pipeline_mode = 'file'

    files = sdrfile.get_files(stream=args.stream,
                              post_pipeline_mode=post_pipeline_mode,
                              dry_run=args.dry_run)

    if args.all:
        # do not hide any attribute

        pass
    else:
        # hide non essential attributes

        files = sdreducecol.run(files)

    if not args.dry_run:
        if len(files) > 0:
            files = sdcolumnfilter.run(files, args.column)
            sdprint.print_format(files, args.format)
        else:
            print_stderr("File not found")
コード例 #2
0
ファイル: sdtsaction.py プロジェクト: lukaszlacinski/synda
def file_dump(args):
    import sdrfile, sddeferredafter, sdcolumnfilter, sdreducecol

    sddeferredafter.add_default_parameter(args.stream,'limit',100)


    if args.raw_mode:
        post_pipeline_mode=None

        args.all=True # we force '--all' option when '--raw_mode' option is set
    else:
        post_pipeline_mode='file'


    files=sdrfile.get_files(stream=args.stream,post_pipeline_mode=post_pipeline_mode,dry_run=args.dry_run)


    if args.all:
        # do not hide any attribute

        pass
    else:
        # hide non essential attributes

        files=sdreducecol.run(files)


    if not args.dry_run:
        if len(files)>0:
            files=sdcolumnfilter.run(files,args.column)
            sdprint.print_format(files,args.format)
        else:
            print_stderr("File not found")   
コード例 #3
0
ファイル: sdtsaction.py プロジェクト: lukaszlacinski/synda
def dataset_dump(args):
    import sdrdataset, sddeferredafter, sdcolumnfilter

    sddeferredafter.add_default_parameter(args.stream,'limit',100)
    post_pipeline_mode=None if args.raw_mode else 'dataset'
    files=sdrdataset.get_datasets(stream=args.stream,post_pipeline_mode=post_pipeline_mode,dry_run=args.dry_run)

    if not args.dry_run:
        if len(files)>0:
            files=sdcolumnfilter.run(files,args.column)
            sdprint.print_format(files,args.format)
        else:
            print_stderr('Dataset not found')
コード例 #4
0
ファイル: sdtsaction.py プロジェクト: nedclimaterisk/synda
def dataset_dump(args):
    import sdrdataset, sddeferredafter, sdcolumnfilter

    sddeferredafter.add_default_parameter(args.stream, 'limit',
                                          sdconfig.get_default_limit('dump'))
    post_pipeline_mode = None if args.raw_mode else 'dataset'
    files = sdrdataset.get_datasets(stream=args.stream,
                                    post_pipeline_mode=post_pipeline_mode,
                                    dry_run=args.dry_run)

    if not args.dry_run:
        if len(files) > 0:
            files = sdcolumnfilter.run(files, args.column)
            sdprint.print_format(files, args.format)
        else:
            print_stderr('Dataset not found')
コード例 #5
0
        # sample aggregation file
        #  cmip5.output1.BCC.bcc-csm1-1-m.rcp26.day.atmos.day.r1i1p1.clt.20120910.aggregation

        result = ('.aggregation' in f.get("title"))

    elif f["type"] == sdconst.SA_TYPE_DATASET:

        # sample aggregation dataset
        #  obs4MIPs.LOA_IPSL.PARASOL.day.parasolRefl.1.aggregation.8

        result = ('.aggregation.' in f.get("instance_id"))

    else:
        raise SDException('SDRMAGGR-001', 'Incorrect type (%s)' % f["type"])

    return result


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-1', '--print_only_one_item', action='store_true')
    parser.add_argument('-F',
                        '--format',
                        choices=sdprint.formats,
                        default='raw')
    args = parser.parse_args()

    files = json.load(sys.stdin)
    files = run(files)
    sdprint.print_format(files, args.format, args.print_only_one_item)
コード例 #6
0
ファイル: sdrmduprep.py プロジェクト: Prodiguer/synda
    # build 'seen' data structure (list of dict => dict (id=>bool))
    seen=dict((f[functional_id_keyname], False) for f in light_metadata.get_files()) # warning: load list in memory


    sdlog.info("SYNDRMDR-002","Perform duplicate and replicate suppression..")

    po=sdpipelineprocessing.ProcessingObject(remove,functional_id_keyname,seen)
    metadata=sdpipelineprocessing.run_pipeline(metadata,po)

    return metadata

def remove(files,functional_id_keyname,seen):
    new_files=[]
    for f in files:
        uniq_id=f[functional_id_keyname]
        if not seen[uniq_id]:
            new_files.append(f)
            seen[uniq_id]=True # mark as seen so other duplicate will be excluded (first item in the loop win)
    return new_files

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-1','--print_only_one_item',action='store_true')
    parser.add_argument('-F','--format',choices=sdprint.formats,default='raw')
    args = parser.parse_args()

    files=json.load( sys.stdin )
    files=run(files)
    sdprint.print_format(files,args.format,args.print_only_one_item)
コード例 #7
0
def run(facets_groups):
    for facets_group in facets_groups:
        sdtranslate.translate_value(facets_group,value_rules)
    return facets_groups

value_rules={
    'model': {
		'inmcm4'      :'INM-CM4',
		'bcc-csm1-1'  :'BCC-CSM1-1',
		'bcc-csm1-1-m':'BCC-CSM1-1-m',
		'GFDL-CM2p1'  :'GFDL-CM2-1'
    },
    'institute': {
        'CCCma':'CCCMA'
    }
}

if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('-1','--print_only_one_item',action='store_true')
    parser.add_argument('-F','--format',choices=sdprint.formats,default='raw')
    args = parser.parse_args()

    facets_groups=json.load( sys.stdin )

    facets_groups=run(facets_groups)

    sdprint.print_format(facets_groups,args.format,args.print_only_one_item)
コード例 #8
0
ファイル: sddump.py プロジェクト: Prodiguer/synda
    Initially designed to batch update attribute in Synda database
    (e.g. when a new attribute is decided to be stored in Synda,
    all already downloaded files metadata must be updated).
    """
    stream=sdstreamutils.get_stream(parameter=parameter,selection_file=selection_file,no_default=no_default)

    sddeferredafter.add_forced_parameter(stream,'replica','false')

    sddeferredafter.add_forced_parameter(stream,'type',type_)

    assert fields is not None
    sddeferredafter.add_forced_parameter(stream,'fields',fields)

    metadata=sdsearch.run(stream=stream,post_pipeline_mode=None,dry_run=dry_run,playback=playback,record=record)
    return metadata.get_files()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('parameter',nargs='*',default=[])

    parser.add_argument('-f','--fields',default=sdfields.get_sample_fields())
    parser.add_argument('-F','--format',choices=sdprint.formats,default='indent')
    parser.add_argument('-y','--dry_run',action='store_true')
    args = parser.parse_args()

    files=dump_ESGF(parameter=args.parameter,fields=args.fields,dry_run=args.dry_run)

    if not args.dry_run:
        sdprint.print_format(files,args.format)
コード例 #9
0
ファイル: sdsearch.py プロジェクト: nedclimaterisk/synda
    parser.add_argument('-f','--file',default=None)
    parser.add_argument('-F','--format',choices=sdprint.formats,default='raw')
    parser.add_argument('-i','--index_host')
    parser.add_argument('-m','--post_pipeline_mode',default='file',choices=sdconst.POST_PIPELINE_MODES)
    parser.add_argument('-y','--dry_run',action='store_true')
    parser.add_argument('-1','--print_only_one_item',action='store_true')

    sdcommonarg.add_playback_record_options(parser)

    parser.add_argument('--load-default',dest='load_default',action='store_true')
    parser.add_argument('--no-load-default',dest='load_default',action='store_false')
    parser.set_defaults(load_default=None)

    parser.add_argument('--parallel',dest='parallel',action='store_true')
    parser.add_argument('--no-parallel',dest='parallel',action='store_false')
    parser.set_defaults(parallel=True)

    args = parser.parse_args()

    metadata=run(path=args.file,
                 parameter=args.parameter,
                 post_pipeline_mode=args.post_pipeline_mode,
                 dry_run=args.dry_run,
                 load_default=args.load_default,
                 parallel=args.parallel,
                 playback=args.playback,
                 record=args.record)

    if not args.dry_run:
        sdprint.print_format(metadata.get_files(),args.format,args.print_only_one_item) # warning: load list in memory
コード例 #10
0
##################################

"""This module filters a columns list."""

import sys
import argparse
import json
import sdprint

def run(files,key_list_to_keep):

    if len(key_list_to_keep)==0:
        return files
    else:
        new_list=[]

        for f in files:
            new_list.append(dict((k, f[k]) for k in f if k in key_list_to_keep))

        return new_list

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-C','--column',type=lambda s: s.split(','),default=[],help="set column(s) to keep")
    parser.add_argument('-F','--format',choices=sdprint.formats,default='raw')
    args = parser.parse_args()

    files=json.load( sys.stdin )
    files=run(files,args.column)
    sdprint.print_format(files,args.format)
コード例 #11
0
    Example
        input
            name='variable'
            values=['tasmin','tasmax']
        output
            "variable=tasmin OR variable=tasmax"
    """
    l=[]
    for v in values:
        l.append("%s='%s'"%(name,v))

    if len(l)>0:
        buf=" OR ".join(l)
        return "(%s)"%buf
    else:
        return ""

# init.

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('file',nargs='?',default='-',help='Facets groups file')
    parser.add_argument('-F','--format',choices=sdprint.formats,default='raw')
    parser.add_argument('-1','--print_only_one_item',action='store_true')
    args = parser.parse_args()

    facets_groups=sdpipelineutils.get_input_data(args.file)
    queries=run(facets_groups)
    sdprint.print_format(queries,args.format,args.print_only_one_item)
コード例 #12
0
ファイル: sdcmpindexes.py プロジェクト: nedclimaterisk/synda
import sys
import os
import argparse
import json
import sdapp
import sdpipeline
import sdindex
import sdrun
import sdprint
import sdproxy_mt

output_dir='/tmp/sdcmpindexes'

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('selection_file',nargs='?',default='-',help='Selection file')
    args = parser.parse_args()

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir) 

    queries=sdpipeline.build_queries(path=args.selection_file)

    for index_host in sdindex.index_host_list:
        sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index
        metadata=sdrun.run(queries)
        metadata=sdpipeline.post_pipeline(metadata,'generic') # this is to exclude malformed files if any

        with open('%s/%s'%(output_dir,index_host),'w') as fh:
            sdprint.print_format(metadata.get_files(),'line',fh=fh)
コード例 #13
0
def transform_facets_for_dataset_attrs_retrieval(facets):
    """Force attributes for dataset attrs retrieval."""

    # do not alter original facets object
    facets_cpy = copy.deepcopy(facets)

    facets_cpy['type'] = ['Dataset']
    facets_cpy['fields'] = ['*']

    return facets_cpy


# init.

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('file',
                        nargs='?',
                        default='-',
                        help='Facets groups file')
    parser.add_argument('-F',
                        '--format',
                        choices=sdprint.formats,
                        default='raw')
    parser.add_argument('-1', '--print_only_one_item', action='store_true')
    args = parser.parse_args()

    facets_groups = sdpipelineutils.get_input_data(args.file)
    queries = run(facets_groups)
    sdprint.print_format(queries, args.format, args.print_only_one_item)
コード例 #14
0
ファイル: sdquicksearch.py プロジェクト: Prodiguer/synda
    return mqr

def ws_call(query):
    request=sdtypes.Request(url=query['url'],pagination=False)
    result=sdnetutils.call_web_service(request.get_url(),timeout=sdconst.SEARCH_API_HTTP_TIMEOUT) # return Response object

    if result.count()>=sdconst.SEARCH_API_CHUNKSIZE:
        raise SDException("SDQSEARC-002","Number of returned files reach maximum limit")

    result=sdaddap.run(result,query.get('attached_parameters',{}))

    return result

if __name__ == '__main__':
    prog=os.path.basename(__file__)
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""examples of use\n%s"""%sdcliex.search(prog))

    parser.add_argument('parameter',nargs='*',default=[],help=sdi18n.m0001)

    parser.add_argument('-F','--format',choices=sdprint.formats,default='indent')
    parser.add_argument('-i','--index_host')
    parser.add_argument('-m','--post_pipeline_mode',default='file')
    parser.add_argument('-y','--dry_run',action='store_true')
    parser.add_argument('-1','--print_only_one_item',action='store_true')

    args = parser.parse_args()

    result=run(parameter=args.parameter,index_host=args.index_host,post_pipeline_mode=args.post_pipeline_mode,dry_run=args.dry_run)

    sdprint.print_format(result.get_files(),args.format,args.print_only_one_item)
コード例 #15
0
ファイル: sdcmpindexes.py プロジェクト: lukaszlacinski/synda
import sys
import os
import argparse
import json
import sdapp
import sdpipeline
import sdindex
import sdrun
import sdprint
import sdproxy_mt

output_dir='/tmp/sdcmpindexes'

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('file',nargs='?',default='-',help='Selection file')
    args = parser.parse_args()

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir) 

    queries=sdpipeline.build_queries(path=args.file)

    for index_host in sdindex.index_host_list:
        sdproxy_mt.set_index_hosts([index_host]) # this is to have parallel, but on only one index
        files=sdrun.run(queries)
        files=sdpipeline.post_pipeline(files,'generic') # this is to exclude malformed files if any

        with open('%s/%s'%(output_dir,index_host),'w') as fh:
            sdprint.print_format(files,'line',fh=fh)
コード例 #16
0
    return result


if __name__ == '__main__':
    prog = os.path.basename(__file__)
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""examples of use\n%s""" % sdcliex.search(prog))

    parser.add_argument('parameter', nargs='*', default=[], help=sdi18n.m0001)

    parser.add_argument('-F',
                        '--format',
                        choices=sdprint.formats,
                        default='indent')
    parser.add_argument('-i', '--index_host')
    parser.add_argument('-m', '--post_pipeline_mode', default='file')
    parser.add_argument('-y', '--dry_run', action='store_true')
    parser.add_argument('-1', '--print_only_one_item', action='store_true')

    args = parser.parse_args()

    result = run(parameter=args.parameter,
                 index_host=args.index_host,
                 post_pipeline_mode=args.post_pipeline_mode,
                 dry_run=args.dry_run)

    sdprint.print_format(result.get_files(), args.format,
                         args.print_only_one_item)