Python CentralProcessingUnit.Extract Examples

Programming Language: Python
Namespace/Package Name: processing.process
Method/Function: Extract
Examples at hotexamples.com: 1
Python CentralProcessingUnit.Extract - 1 examples found. These are the top rated real world Python examples of processing.process.CentralProcessingUnit.Extract extracted from open source projects. You can rate examples to help us improve the quality of examples.
Frequently Used Methods
Show Hide
CentralProcessingUnit(6)
Climatology(2)
Extract(1)
Example #1
Show file
def performExtraction(dataset,
                      mode,
                      stnfct,
                      dataargs,
                      loverwrite=False,
                      varlist=None,
                      lwrite=True,
                      lreturn=False,
                      ldebug=False,
                      lparallel=False,
                      pidstr='',
                      logger=None):
    ''' worker function to extract point data from gridded dataset '''
    # input checking
    if not isinstance(dataset, basestring): raise TypeError
    if not isinstance(dataargs, dict):
        raise TypeError  # all dataset arguments are kwargs
    if not callable(stnfct):
        raise TypeError  # function to load station dataset
    if lparallel:
        if not lwrite:
            raise IOError, 'In parallel mode we can only write to disk (i.e. lwrite = True).'
        if lreturn:
            raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'

    # logging
    if logger is None:  # make new logger
        logger = logging.getLogger()  # new logger
        logger.addHandler(logging.StreamHandler())
    else:
        if isinstance(logger, basestring):
            logger = logging.getLogger(name=logger)  # connect to existing one
        elif not isinstance(logger, logging.Logger):
            raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(
                str(logger))

    lclim = False
    lts = False
    if mode == 'climatology': lclim = True
    elif mode == 'time-series': lts = True
    else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)

    ## extract meta data from arguments
    dataargs, loadfct, srcage, datamsgstr = getMetaData(
        dataset, mode, dataargs)
    dataset_name = dataargs.dataset_name
    periodstr = dataargs.periodstr
    avgfolder = dataargs.avgfolder

    # load template dataset
    stndata = stnfct()  # load station dataset from function
    if not isinstance(stndata, Dataset): raise TypeError
    # N.B.: the loading function is necessary, because DataseNetCDF instances do not pickle well

    # get filename for target dataset and do some checks
    filename = getTargetFile(dataset=dataset,
                             mode=mode,
                             dataargs=dataargs,
                             lwrite=lwrite,
                             station=stndata.name)

    if ldebug: filename = 'test_' + filename
    if not os.path.exists(avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            avgfolder)
    lskip = False  # else just go ahead
    if lwrite:
        if lreturn:
            tmpfilename = filename  # no temporary file if dataset is passed on (can't rename the file while it is open!)
        else:
            if lparallel: tmppfx = 'tmp_exstns_{:s}_'.format(pidstr[1:-1])
            else: tmppfx = 'tmp_exstns_'.format(pidstr[1:-1])
            tmpfilename = tmppfx + filename
        filepath = avgfolder + filename
        tmpfilepath = avgfolder + tmpfilename
        if os.path.exists(filepath):
            if not loverwrite:
                age = datetime.fromtimestamp(os.path.getmtime(filepath))
                # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
                if age > srcage and os.path.getsize(filepath) > 1e5:
                    lskip = True
                # N.B.: NetCDF files smaller than 100kB are usually incomplete header fragments from a previous crashed

    # depending on last modification time of file or overwrite setting, start computation, or skip
    if lskip:
        # print message
        skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
            pidstr, filename, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.info(skipmsg)
    else:

        ## actually load datasets
        source = loadfct()  # load source
        # check period
        if 'period' in source.atts and dataargs.periodstr != source.atts.period:  # a NetCDF attribute
            raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(
                periodstr, source.atts.period)

        # print message
        if lclim:
            opmsgstr = "Extracting '{:s}'-type Point Data from Climatology ({:s})".format(
                stndata.name, periodstr)
        elif lts:
            opmsgstr = "Extracting '{:s}'-type Point Data from Time-series".format(
                stndata.name)
        else:
            raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
        # print feedback to logger
        logger.info(
            '\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'
            .format(pidstr, datamsgstr, opmsgstr))
        if not lparallel and ldebug: logger.info('\n' + str(source) + '\n')

        ## create new sink/target file
        # set attributes
        atts = source.atts.copy()
        atts[
            'period'] = dataargs.periodstr if dataargs.periodstr else 'time-series'
        atts['name'] = dataset_name
        atts['station'] = stndata.name
        atts['title'] = '{:s} (Stations) from {:s} {:s}'.format(
            stndata.title, dataset_name, mode.title())
        # make new dataset
        if lwrite:  # write to NetCDF file
            if os.path.exists(tmpfilepath):
                os.remove(tmpfilepath)  # remove old temp files
            sink = DatasetNetCDF(folder=avgfolder,
                                 filelist=[tmpfilename],
                                 atts=atts,
                                 mode='w')
        else:
            sink = Dataset(atts=atts)  # ony create dataset in memory

        # initialize processing
        CPU = CentralProcessingUnit(source,
                                    sink,
                                    varlist=varlist,
                                    tmp=False,
                                    feedback=ldebug)

        # extract data at station locations
        CPU.Extract(template=stndata, flush=True)
        # get results
        CPU.sync(flush=True)

        # print dataset
        if not lparallel and ldebug:
            logger.info('\n' + str(sink) + '\n')
        # write results to file
        if lwrite:
            sink.sync()
            writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                pidstr, filename, dataset_name)
            writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
            logger.info(writemsg)

            # rename file to proper name
            if not lreturn:
                sink.unload()
                sink.close()
                del sink  # destroy all references
                if os.path.exists(filepath):
                    os.remove(filepath)  # remove old file
                os.rename(tmpfilepath, filepath)
            # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed

        # clean up and return
        source.unload()
        del source  #, CPU
        if lreturn:
            return sink  # return dataset for further use (netcdf file still open!)
        else:
            return 0  # "exit code"