Python Collection Examples

Programming Language: Python

Namespace/Package Name: edas.collection.agg

Class/Type: Collection

Examples at hotexamples.com: 4

Python Collection - 4 examples found. These are the top rated real world Python examples of edas.collection.agg.Collection extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

new(4)

getCollectionsList(1)

Frequently Used Methods

new (4)

getCollectionsList (1)

Example #1

Show file

    def buildWorkflow(self, request: TaskRequest, node: WorkflowNode, inputs: EDASDatasetCollection )  -> EDASDatasetCollection:
        snode: SourceNode = node
        results = EDASDatasetCollection( "InputKernel.build-" + node.name )
        t0 = time.time()
        dset = self.getCachedDataset( snode )
        if dset is not None:
            self.importToDatasetCollection(results, request, snode, dset.xr )
            self.logger.info( "Access input data from cache: " + dset.id )
        else:
            dataSource: DataSource = snode.varSource.dataSource
            if dataSource.type == SourceType.collection:
                from edas.collection.agg import Axis as AggAxis, File as AggFile
                collection = Collection.new( dataSource.address )
                self.logger.info("Input collection: " + dataSource.address )
                aggs = collection.sortVarsByAgg( snode.varSource.vids )
                domain = request.operationManager.domains.getDomain( snode.domain )
                if domain is not None:
                    timeBounds = domain.findAxisBounds(Axis.T)
                    startDate = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.start)
                    endDate   = None if (domain is None or timeBounds is None) else TimeConversions.parseDate(timeBounds.end)
                else: startDate = endDate = None
                for ( aggId, vars ) in aggs.items():
                    use_chunks = True
                    pathList = collection.pathList(aggId) if startDate is None else collection.periodPathList(aggId,startDate,endDate)
                    assert len(pathList) > 0, f"No files found in aggregation {aggId} for date range {startDate} - {endDate} "
                    nFiles = len(pathList)
                    if use_chunks:
                        nReadPartitions = int( EdasEnv.get( "mfdataset.npartitions", 250 ) )
                        agg = collection.getAggregation(aggId)
                        nchunks, fileSize = agg.getChunkSize( nReadPartitions, nFiles )
                        chunk_kwargs = {} if nchunks is None else dict(chunks={"time": nchunks})
                        self.logger.info( f"Open mfdataset: vars={vars}, NFILES={nFiles}, FileSize={fileSize}, FILES[0]={pathList[0]}, chunk_kwargs={chunk_kwargs}, startDate={startDate}, endDate={endDate}, domain={domain}" )
                    else:
                        chunk_kwargs = {}
                        self.logger.info( f"Open mfdataset: vars={vars},  NFILES={nFiles}, FILES[0]={pathList[0]}" )
                    dset = xr.open_mfdataset( pathList, engine='netcdf4', data_vars=vars, parallel=True, **chunk_kwargs )
                    self.logger.info(f"Import to collection")
                    self.importToDatasetCollection( results, request, snode, dset )
                    self.logger.info(f"Collection import complete.")
            elif dataSource.type == SourceType.file:
                self.logger.info( "Reading data from address: " + dataSource.address )
                files = glob.glob( dataSource.address )
                parallel = len(files) > 1
                assert len(files) > 0, f"No files matching path {dataSource.address}"
                dset = xr.open_mfdataset(dataSource.address, engine='netcdf4', data_vars=snode.varSource.ids, parallel=parallel )
                self.importToDatasetCollection(results, request, snode, dset)
            elif dataSource.type == SourceType.archive:
                self.logger.info( "Reading data from archive: " + dataSource.address )
                dataPath =  request.archivePath( dataSource.address )
                dset = xr.open_mfdataset( [dataPath] )
                self.importToDatasetCollection(results, request, snode, dset)
            elif dataSource.type == SourceType.dap:
                nchunks = request.runargs.get( "ncores", 8 )
                self.logger.info( f" --------------->>> Reading data from address: {dataSource.address}, nchunks = {nchunks}" )
#                dset = xr.open_mfdataset( [dataSource.address], engine="netcdf4", data_vars=snode.varSource.ids, chunks={"time":nchunks} )
                dset = xr.open_dataset( dataSource.address, engine="netcdf4", chunks={"time":nchunks} )
                self.importToDatasetCollection( results, request, snode, dset )
            self.logger.info( f"Access input data source {dataSource.address}, time = {time.time() - t0} sec" )
            self.logger.info( "@L: LOCATION=> host: {}, thread: {}, proc: {}".format( socket.gethostname(), threading.get_ident(), os.getpid() ) )
        return results

Example #2

Show file

 def getCapabilitiesXml(self, type: str) -> str:
     from edas.collection.agg import Collection
     if type == None: type = "kernels"
     self.logger.info(" GetCapabilities --> type: " + str(type))
     if (type.lower().startswith("ker")):
         specs = [
             opMod.getCapabilitiesXml()
             for opMod in self.operation_modules.values()
         ]
         return '<modules> {} </modules>'.format(" ".join(specs))
     elif (type.lower().startswith("col")):
         specs = Collection.getCollectionsList()
         return '<collection> {} </collection>'.format(" ".join(specs))
     elif (type.lower().startswith("var")):
         type_toks = type.split("|")
         collection = Collection.new(type_toks[1])
         return collection.getVariableSpec(type_toks[2])
     else:
         raise Exception("Unknown capabilities type: " + type)

Example #3

Show file

File: endpoint.py Project: big-data-lab-umbc/edask

 def getVariableSpec(self, collId: str, varId: str) -> Dict:
     from edas.collection.agg import Collection
     col = Collection.new(collId)
     varSpec = col.getVariableSpec(varId)
     return Message("var", "VariableSpec", varSpec).dict()

Example #4

Show file

File: collection_input.py Project: nasa-nccs-cds/edask

import time, traceback
from dask.distributed import Client
from edas.collection.agg import Collection

print("STARTUP")
client = None
start = time.time()
collection = "cip_merra2_mth"
varName = 'KE'

try:
    client = Client('cldradn101:8786')

    print("READ " + collection)

    collection = Collection.new(collection)
    ds = xa.open_mfdataset(collection.pathList(varName),
                           data_vars=['KE'],
                           parallel=True)

    print("COMPUTE MEAN, Result:")

    lat_bnds, lon_bnds = [40, 43], [-96, -89]  # use CONUS bounds
    ds.sel(lat=slice(*lat_bnds), lon=slice(*lon_bnds))

    print(ds.KE.mean().values)

    print(" Completed computation in " + str(time.time() - start) + " seconds")

except Exception:
    traceback.print_exc()