Exemple #1
0
def get_level(resource, level):
    from blackswan.ocgis_module import call
    from netCDF4 import Dataset
    from blackswan.utils import get_variable
    from numpy import squeeze
    from os import path

    try:
        if type(resource) == list:
            resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0])
            # resource.sort()

        level_data = call(resource, level_range=[int(level), int(level)])
        variable = get_variable(level_data)
        LOGGER.info('found %s in file' % variable)
        ds = Dataset(level_data, mode='a')
        var = ds.variables.pop(variable)
        dims = var.dimensions
        new_var = ds.createVariable('z%s' % level, var.dtype, dimensions=(dims[0], dims[2], dims[3]))
        # i = where(var[:]==level)
        new_var[:, :, :] = squeeze(var[:, 0, :, :])

        # TODO: Here may be an error! in case of exception, dataset will not close!
        # Exception arise for example for 20CRV2 data...
        try:
            new_var.setncatts({k: var.getncattr(k) for k in var.ncattrs()})
        except:
            LOGGER.info('Could not set attributes for z%s' % level)
        ds.close()
        LOGGER.info('level %s extracted' % level)
        data = call(level_data, variable='z%s' % level)
    except:
        LOGGER.exception('failed to extract level')

    return data
Exemple #2
0
def spaghetti(resouces, variable=None, title=None, file_extension='png'):
    """
    creates a png file containing the appropriate spaghetti plot as a field mean of the values.

    :param resouces: list of files containing the same variable
    :param variable: variable to be visualised. If None (default), variable will be detected
    :param title: string to be used as title

    :retruns str: path to png file
    """
    from blackswan.calculation import fieldmean

    try:
        fig = plt.figure(figsize=(20, 10), dpi=600, facecolor='w', edgecolor='k')
        LOGGER.debug('Start visualisation spaghetti plot')

        # === prepare invironment
        if type(resouces) != list:
            resouces = [resouces]
        if variable is None:
            variable = utils.get_variable(resouces[0])
        if title is None:
            title = "Field mean of %s " % variable

        LOGGER.info('plot values preparation done')
    except:
        msg = "plot values preparation failed"
        LOGGER.exception(msg)
        raise Exception(msg)
    try:
        for c, nc in enumerate(resouces):
            # get timestapms
            try:
                dt = utils.get_time(nc)  # [datetime.strptime(elem, '%Y-%m-%d') for elem in strDate[0]]
                ts = fieldmean(nc)
                plt.plot(dt, ts)
                # fig.line( dt,ts )
            except:
                msg = "spaghetti plot failed for %s " % nc
                LOGGER.exception(msg)

        plt.title(title, fontsize=20)
        plt.grid()

        output_png = fig2plot(fig=fig, file_extension=file_extension)

        plt.close()
        LOGGER.info('timeseries spaghetti plot done for %s with %s lines.' % (variable, c))
    except:
        msg = 'matplotlib spaghetti plot failed'
        LOGGER.exception(msg)
    return output_png
Exemple #3
0
    def _handler(self, request, response):

        if 'variable' in request.inputs:
            variable = request.inputs['variable'][0].data
        else:
            variable = get_variable(request.inputs['dataset'][0].file)
            if type(variable) is tuple: variable = variable[0]

        #variable = request.inputs['variable'][0].data

        # Call simple_plot function
        output = simple_plot(
            resource=request.inputs['dataset'][0].file,
            variable=variable)
        LOGGER.info("produced output: %s", output)
        response.outputs['output'].file = output
        response.update_status("simple_plot done", 100)
        return response
Exemple #4
0
def set_metadata_segetalflora(resource):
    """
  :param resources: imput files
  """
    # gather the set_metadata

    dic_segetalflora = {
        'keywords': 'Segetalflora',
        'tier': '2',
        'in_var': 'tas',
        'description': 'Number of European segetalflora species',
        'method': 'regression equation',
        'institution':
        'Julius Kuehn-Institut (JKI) Federal Research Centre for Cultivated Plants',
        'institution_url': 'www.jki.bund.de',
        'institute_id': "JKI",
        'contact_mail_3': '*****@*****.**',
        'version': '1.0',
    }

    dic_climatetype = {
        '1': 'cold northern species group',
        '2': 'warm northern species group',
        '3': 'moderate warm-toned species group',
        '4': 'moderate warm-toned to mediterranean species group',
        '5': 'mediterranean species group',
        '6': 'climate-indifferent species',
        '7': 'climate-undefinable species',
        'all': 'species of all climate types'
    }

    try:
        set_basic_md(resource)
    except Exception as e:
        LOGGER.error(e)

    try:
        set_dynamic_md(resource)
    except Exception as e:
        LOGGER.error(e)

    #set the segetalflora specific metadata
    try:
        ds = Dataset(resource, mode='a')
        ds.setncatts(dic_segetalflora)
        ds.close()
    except Exception as e:
        LOGGER.error(e)
        # set the variable attributes:
    from blackswan.utils import get_variable

    try:
        ds = Dataset(resource, mode='a')
        var = get_variable(resource)
        if 'all' in var:
            climat_type = 'all'
        else:
            climat_type = var[-1]

        culture_type = var.strip('sf').strip(climat_type)

        sf = ds.variables[var]
        sf.setncattr('units', 1)
        sf.setncattr('standard_name', 'sf%s%s' % (culture_type, climat_type))
        sf.setncattr(
            'long_name', 'Segetal flora %s land use for %s' %
            (culture_type, dic_climatetype['%s' % climat_type]))
        ds.close()
    except Exception as e:
        LOGGER.error('failed to set sf attributes %s ' % e)
    # sort the attributes:
    try:
        ds = Dataset(resource, mode='a')
        att = ds.ncattrs()
        att.sort()
        for a in att:
            entry = ds.getncattr(a)
            ds.setncattr(a, entry)
        history = '%s , Segetalflora Impact Model V1.0' % (ds.history)
        ds.setncattr('history', history)
        ds.close()
    except Exception as e:
        LOGGER.error('failed to sort attributes %s ' % e)

    return resource
Exemple #5
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        response.update_status('execution started at : {}'.format(dt.now()), 5)

        ################################
        # reading in the input arguments
        ################################
        try:
            LOGGER.info('read in the arguments')
            # resources = self.getInputValues(identifier='resources')
            season = request.inputs['season'][0].data
            LOGGER.info('season %s', season)

            period = request.inputs['period'][0].data
            LOGGER.info('period %s', period)
            anualcycle = request.inputs['anualcycle'][0].data

            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')
            LOGGER.debug('start: %s , end: %s ' % (start, end))

            resource = archiveextract(
                resource=rename_complexinputs(request.inputs['resource']))
            # resource = archiveextract(resource=[res.file for res in request.inputs['resource']])
            url_Rdat = request.inputs['Rdat'][0].data
            url_dat = request.inputs['dat'][0].data
            url_ref_file = request.inputs['netCDF'][0].data  # can be None
            # season = self.getInputValues(identifier='season')[0]
            # period = self.getInputValues(identifier='period')[0]
            # anualcycle = self.getInputValues(identifier='anualcycle')[0]
            LOGGER.info('period %s' % str(period))
            LOGGER.info('season %s' % str(season))
            LOGGER.info('read in the arguments')
            LOGGER.info('url_ref_file: %s' % url_ref_file)
            LOGGER.info('url_Rdat: %s' % url_Rdat)
            LOGGER.info('url_dat: %s' % url_dat)
        except Exception as e:
            LOGGER.debug('failed to convert arguments %s ' % e)

        ############################
        # fetching trainging data
        ############################

        try:
            dat = abspath(download(url_dat))
            Rdat = abspath(download(url_Rdat))
            LOGGER.info('training data fetched')
        except Exception as e:
            LOGGER.error('failed to fetch training data %s' % e)

        ##########################################################
        # get the required bbox and time region from resource data
        ##########################################################
        # from flyingpigeon.weatherregimes import get_level
        try:
            from blackswan.ocgis_module import call
            from blackswan.utils import get_variable
            time_range = [start, end]

            variable = get_variable(resource)

            if len(url_ref_file) > 0:
                ref_file = download(url_ref_file)
                model_subset = call(
                    resource=resource,
                    variable=variable,
                    time_range=
                    time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                    regrid_destination=ref_file,
                    regrid_options='bil')
                LOGGER.info('Dataset subset with regridding done: %s ' %
                            model_subset)
            else:
                model_subset = call(
                    resource=resource,
                    variable=variable,
                    time_range=
                    time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                )
                LOGGER.info('Dataset time period extracted: %s ' %
                            model_subset)
        except:
            LOGGER.exception('failed to make a data subset ')

        #######################
        # computing anomalies
        #######################
        try:
            cycst = anualcycle.split('-')[0]
            cycen = anualcycle.split('-')[1]
            reference = [
                dt.strptime(cycst, '%Y%m%d'),
                dt.strptime(cycen, '%Y%m%d')
            ]
            model_anomal = wr.get_anomalies(model_subset,
                                            reference=reference,
                                            sseas='multi')

            #####################
            # extracting season
            #####################

            model_season = wr.get_season(model_anomal, season=season)
        except:
            LOGGER.exception('failed to compute anualcycle or seasons')

        #######################
        # call the R scripts
        #######################

        import shlex
        import subprocess
        from blackswan import config
        from os.path import curdir, exists, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_projection.R'

            yr1 = start.year
            yr2 = end.year
            time = get_time(model_season)  # , format='%Y%m%d')

            # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')
            ip, output_frec = mkstemp(dir=curdir, suffix='.txt')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % model_season,
                '%s' % variable,
                '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""),
                # '%s' % output_graphics,
                '%s' % dat,
                '%s' % Rdat,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % output_frec,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % 'MODEL'
            ]

            LOGGER.info('Rcall builded')
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            LOGGER.error(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            # , shell=True
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                response.update_status('**** weatherregime in R suceeded', 90)
            else:
                LOGGER.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        #################
        # set the outputs
        #################

        response.update_status('Set the process outputs ', 95)

        response.outputs['output_pca'].file = file_pca
        response.outputs['output_classification'].file = file_class
        response.outputs['output_netcdf'].file = model_season
        response.outputs['output_frequency'].file = output_frec

        response.update_status('done', 100)
        return response
Exemple #6
0
    def _handler(self, request, response):

        # LOGGER.debug('CURDIR XXXX : %s ' % (abspath(curdir)))
        # LOGGER.debug('WORKDIR XXXX : %s ' % (self.workdir))
        os.chdir(self.workdir)
        # LOGGER.debug('CURDIR XXXX : %s ' % (abspath(curdir)))

        init_process_logger('log.txt')
        # init_process_logger(os.path.join(self.workdir, 'log.txt'))

        # response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 6)

            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data
            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data
            timres = request.inputs['timeres'][0].data

            bboxDef = '-20,40,30,70'  # in general format

            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or
                    abs(float(bboxStr[1]) > 180) or
                    abs(float(bboxStr[2]) > 90) or
                    abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr))
                bboxStr = bboxStr.split(',')

            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            normalize = request.inputs['normalize'][0].data
            detrend = request.inputs['detrend'][0].data
            plot = request.inputs['plot'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            model_var = request.inputs['reanalyses'][0].data
            model, var = model_var.split('_')

            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 7)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            response.update_status('Preparing enviroment converting arguments', 8)
            LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt))

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.exception('output format not valid')

        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 9)

        try:
            if model == 'NCEP':
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    # conform_units_to = None
                else:
                    level = None
                    if var == 'precip':
                        var = 'pr_wtr'
                    # conform_units_to = 'hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    # conform_units_to = None
                else:
                    level = None
                    # conform_units_to = 'hPa'
            else:
                LOGGER.exception('Reanalyses dataset not known')
            LOGGER.info('environment set for model: %s' % model)
        except Exception:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017:
        # - ALL the data will be downloaded, 1950 - 2017
        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=var, timres=timres, getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except Exception:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('subsetting region of interest', 10)

        LOGGER.debug("start and end time: %s - %s" % (start, end))
        time_range = [start, end]

        # Checking memory and dataset size
        model_size = get_files_size(model_nc)
        memory_avail = psutil.virtual_memory().available
        thrs = 0.3  # 30%
        if (model_size >= thrs * memory_avail):
            ser_r = True
        else:
            ser_r = False

        # ################################

        # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb
        # So it makes sense, to operate it step-by-step
        # TODO: need to create dictionary for such datasets (for models as well)
        # TODO: benchmark the method bellow for NCEP z500 for 60 years

#        if ('20CRV2' in model) and ('z' in var):
        if ('z' in var):
            tmp_total = []
            origvar = get_variable(model_nc)

            for z in model_nc:
                # tmp_n = 'tmp_%s' % (uuid.uuid1())
                b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox,
                spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3])
                tmp_total.append(b0)

            tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                os.system(tbr)

            # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
            ds.close()
            model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level)
        else:
            if ser_r:
                LOGGER.debug('Process reanalysis step-by-step')
                tmp_total = []
                for z in model_nc:
                    # tmp_n = 'tmp_%s' % (uuid.uuid1())
                    b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap',
                            prefix='Rdom_' + os.path.basename(z)[0:-3])
                    tmp_total.append(b0)
                tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
                model_subset_tmp = call(resource=tmp_total, variable=var, time_range=time_range)
            else:
                LOGGER.debug('Using whole dataset at once')
                model_subset_tmp = call(resource=model_nc, variable=var,
                                        geom=bbox, spatial_wrapping='wrap', time_range=time_range,
                                        )

        # If dataset is 20CRV2 the 6 hourly file should be converted to daily.
        # Option to use previously 6h data from cache (if any) and not download daily files.

        # Disabled for now
        # if '20CRV2' in model:
        #     if timres == '6h':
        #         from cdo import Cdo

        #         cdo = Cdo(env=os.environ)
        #         model_subset = '%s.nc' % uuid.uuid1()
        #         tmp_f = '%s.nc' % uuid.uuid1()

        #         cdo_op = getattr(cdo, 'daymean')
        #         cdo_op(input=model_subset_tmp, output=tmp_f)
        #         sti = '00:00:00'
        #         cdo_op = getattr(cdo, 'settime')
        #         cdo_op(sti, input=tmp_f, output=model_subset)
        #         LOGGER.debug('File Converted from: %s to daily' % (timres))
        #     else:
        #         model_subset = model_subset_tmp
        # else:
        #     model_subset = model_subset_tmp

        # Remove \/\/\/ if work with 6h data...
        model_subset = model_subset_tmp

        LOGGER.info('Dataset subset done: %s ', model_subset)

        response.update_status('dataset subsetted', 15)

        # BLOCK OF DETRENDING of model_subset !
        # Original model subset kept to further visualisaion if needed
        # Now is issue with SLP:
        # TODO 1 Keep trend as separate file
        # TODO 2 Think how to add options to plot abomalies AND original data...
        #        May be do archive and simulation = call.. over NOT detrended data and keep it as well
        # TODO 3 Check with faster smoother add removing trend of each grid

        if detrend == 'None':
            orig_model_subset = model_subset
        else:
            orig_model_subset = remove_mean_trend(model_subset, varname=var)

        # ======================================

        LOGGER.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        response.update_status('**** Input data fetched', 20)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 30)
        start_time = time.time()  # measure data preperation ...

        try:
            # Construct descriptive filenames for the three files
            # listed in config file

            # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
            # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')

            # Fix < 1900 issue...
            refDatesString = refSt.isoformat().strip().split("T")[0] + "_" + refEn.isoformat().strip().split("T")[0]
            simDatesString = dateSt.isoformat().strip().split("T")[0] + "_" + dateEn.isoformat().strip().split("T")[0]

            archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                                % (bbox[0], bbox[2], bbox[1], bbox[3])
            simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])
            archive = call(resource=model_subset,
                           time_range=[refSt, refEn],
                           prefix=archiveNameString)
            simulation = call(resource=model_subset, time_range=[dateSt, dateEn],
                              prefix=simNameString)
            LOGGER.info('archive and simulation files generated: %s, %s'
                        % (archive, simulation))
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                LOGGER.info('normalization function with method: %s '
                            % normalize)
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive,
                    simulation,
                    method=normalize)
            else:
                seasoncyc_base = seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to generate normalization files %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        output_file = 'output.txt'
        files = [os.path.abspath(archive), os.path.abspath(simulation), output_file]
        LOGGER.debug("Data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generate the config file
        ############################
        config_file = analogs.get_configfile(
            files=files,
            seasoncyc_base=seasoncyc_base,
            seasoncyc_sim=seasoncyc_sim,
            base_id=model,
            sim_id=model,
            timewin=timewin,
            varname=var,
            seacyc=seacyc,
            cycsmooth=91,
            nanalog=nanalog,
            seasonwin=seasonwin,
            distfun=distance,
            outformat=outformat,
            calccor=True,
            silent=False,
            # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
            period=[refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0]],
            bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox))
        response.update_status('generated config file', 40)
        #######################
        # CASTf90 call
        #######################
        start_time = time.time()  # measure call castf90

        # -----------------------
        try:
            import ctypes
            # TODO: This lib is for linux
            mkl_rt = ctypes.CDLL('libmkl_rt.so')
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('Current number of threads: %s' % (nth))
            mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('NEW number of threads: %s' % (nth))
            # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
            os.environ['MKL_NUM_THREADS'] = str(nth)
            os.environ['OMP_NUM_THREADS'] = str(nth)
        except Exception as e:
            msg = 'Failed to set THREADS %s ' % e
            LOGGER.debug(msg)
        # -----------------------

        # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ###############
        # ##### MUST be removed after castf90 recompiled with the latest hdf version
        # ##### NOT safe
        os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1'
        # hdflib = os.path.expanduser("~") + '/anaconda/lib'
        # hdflib = os.getenv("HOME") + '/anaconda/lib'
        import pwd
        hdflib = pwd.getpwuid(os.getuid()).pw_dir + '/anaconda/lib'
        os.environ['LD_LIBRARY_PATH'] = hdflib
        # ################################################################################

        response.update_status('Start CASTf90 call', 50)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = ['analogue.out', config_file]
            LOGGER.debug("castf90 command: %s", cmd)
            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            LOGGER.info('analogue output:\n %s', output)
            response.update_status('**** CASTf90 suceeded', 60)
        except CalledProcessError as e:
            msg = 'CASTf90 failed:\n{0}'.format(e.output)
            LOGGER.exception(msg)
            raise Exception(msg)
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        # TODO: Add try - except for pdfs
        if plot == 'Yes':
            analogs_pdf = analogs.plot_analogs(configfile=config_file)
        else:
            analogs_pdf = 'dummy_plot.pdf'
            with open(analogs_pdf, 'a'): os.utime(analogs_pdf, None)

        response.update_status('preparing output', 70)
        
        response.outputs['analog_pdf'].file = analogs_pdf
        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        if seacyc is True:
            response.outputs['base_netcdf'].file = seasoncyc_base
            response.outputs['sim_netcdf'].file = seasoncyc_sim
        else:
            # TODO: Still unclear how to overpass unknown number of outputs
            dummy_base = 'dummy_base.nc'
            dummy_sim = 'dummy_sim.nc'
            with open(dummy_base, 'a'): os.utime(dummy_base, None)
            with open(dummy_sim, 'a'): os.utime(dummy_sim, None)
            response.outputs['base_netcdf'].file = dummy_base
            response.outputs['sim_netcdf'].file = dummy_sim

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 80)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 90)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        response.outputs['output_log'].file = 'log.txt'
        return response
Exemple #7
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 6)

            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data

            # timres = request.inputs['timeres'][0].data
            timres = 'day'
            season = request.inputs['season'][0].data
            bboxDef = '-20,40,30,70'  # in general format

            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or
                    abs(float(bboxStr[1]) > 180) or
                    abs(float(bboxStr[2]) > 90) or
                    abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr))
                bboxStr = bboxStr.split(',')

            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            distance = request.inputs['dist'][0].data
            method = request.inputs['method'][0].data

            model_var = request.inputs['reanalyses'][0].data
            model, var = model_var.split('_')

            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 7)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################

        start = dateSt
        end = dateEn

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 9)

        try:
            if model == 'NCEP':
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    # conform_units_to = None
                else:
                    level = None
                    if var == 'precip':
                        var = 'pr_wtr'
                    # conform_units_to = 'hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    # conform_units_to = None
                else:
                    level = None
                    # conform_units_to = 'hPa'
            else:
                LOGGER.exception('Reanalyses dataset not known')
            LOGGER.info('environment set for model: %s' % model)
        except Exception:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        # NOTE: If ref is say 1950 - 1990, and sim is just 1 week in 2017:
        # - ALL the data will be downloaded, 1950 - 2017
        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=var, timres=timres, getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except Exception:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('subsetting region of interest', 10)
        # from flyingpigeon.weatherregimes import get_level
        LOGGER.debug("start and end time: %s - %s" % (start, end))
        time_range = [start, end]

        # Checking memory and dataset size
        model_size = get_files_size(model_nc)
        memory_avail = psutil.virtual_memory().available
        thrs = 0.5  # 50%
        if (model_size >= thrs * memory_avail):
            ser_r = True
        else:
            ser_r = False

        # ################################

        # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb
        # So it makes sense, to operate it step-by-step
        # TODO: need to create dictionary for such datasets (for models as well)
        # TODO: benchmark the method bellow for NCEP z500 for 60 years

#        if ('20CRV2' in model) and ('z' in var):
        if ('z' in var):
            tmp_total = []
            origvar = get_variable(model_nc)

            for z in model_nc:
                # tmp_n = 'tmp_%s' % (uuid.uuid1())
                b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox,
                spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3])
                tmp_total.append(b0)

            tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                os.system(tbr)

            # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
            ds.close()
            model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level)
        else:
            if ser_r:
                LOGGER.debug('Process reanalysis step-by-step')
                tmp_total = []
                for z in model_nc:
                    # tmp_n = 'tmp_%s' % (uuid.uuid1())
                    b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap',
                            prefix='Rdom_' + os.path.basename(z)[0:-3])
                    tmp_total.append(b0)
                tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
                model_subset_tmp = call(resource=tmp_total, variable=var, time_range=time_range)
            else:
                LOGGER.debug('Using whole dataset at once')
                model_subset_tmp = call(resource=model_nc, variable=var,
                                        geom=bbox, spatial_wrapping='wrap', time_range=time_range,
                                        )

        # If dataset is 20CRV2 the 6 hourly file should be converted to daily.
        # Option to use previously 6h data from cache (if any) and not download daily files.

        if '20CRV2' in model:
            if timres == '6h':
                from cdo import Cdo

                cdo = Cdo(env=os.environ)
                model_subset = '%s.nc' % uuid.uuid1()
                tmp_f = '%s.nc' % uuid.uuid1()

                cdo_op = getattr(cdo, 'daymean')
                cdo_op(input=model_subset_tmp, output=tmp_f)
                sti = '00:00:00'
                cdo_op = getattr(cdo, 'settime')
                cdo_op(sti, input=tmp_f, output=model_subset)
                LOGGER.debug('File Converted from: %s to daily' % (timres))
            else:
                model_subset = model_subset_tmp
        else:
            model_subset = model_subset_tmp

        LOGGER.info('Dataset subset done: %s ', model_subset)

        response.update_status('dataset subsetted', 15)

        # ======================================

        LOGGER.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        response.update_status('**** Input data fetched', 20)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 30)
        start_time = time.time()  # measure data preperation ...

        # -----------------------
        #  try:
        #     import ctypes
        #     # TODO: This lib is for linux
        #     mkl_rt = ctypes.CDLL('libmkl_rt.so')
        #     nth = mkl_rt.mkl_get_max_threads()
        #     LOGGER.debug('Current number of threads: %s' % (nth))
        #     mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
        #     nth = mkl_rt.mkl_get_max_threads()
        #     LOGGER.debug('NEW number of threads: %s' % (nth))
        #     # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
        #     os.environ['MKL_NUM_THREADS'] = str(nth)
        #     os.environ['OMP_NUM_THREADS'] = str(nth)
        # except Exception as e:
        #     msg = 'Failed to set THREADS %s ' % e
        #     LOGGER.debug(msg)
        # -----------------------

        response.update_status('Start DIM calc', 50)

        # Calculation of Local Dimentsions ==================
        LOGGER.debug('Calculation of the distances using: %s metric' % (distance))
        LOGGER.debug('Calculation of the dims with: %s' % (method))

        dim_filename = '%s.txt' % model
        tmp_dim_fn = '%s.txt' % uuid.uuid1()

        Rsrc = config.Rsrc_dir()

        if (method == 'Python'):
            try:
                l_dist, l_theta = localdims(resource=model_subset, variable=var, distance=str(distance))
                response.update_status('**** Dims with Python suceeded', 60)
            except:
                LOGGER.exception('NO! output returned from Python call')

        if (method == 'Python_wrap'):
            try:
                l_dist, l_theta = localdims_par(resource=model_subset, variable=var, distance=str(distance))
                response.update_status('**** Dims with Python suceeded', 60)
            except:
                LOGGER.exception('NO! output returned from Python call')

        if (method == 'R'):
            # from os.path import join
            Rfile = 'localdimension_persistence_fullD.R'
            args = ['Rscript', os.path.join(Rsrc, Rfile),
                    '%s' % model_subset, '%s' % var,
                    '%s' % tmp_dim_fn]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))

            try:
                output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
                LOGGER.info('R outlog info:\n %s ' % output)
                LOGGER.exception('R outlog errors:\n %s ' % error)
                if len(output) > 0:
                    response.update_status('**** Dims with R suceeded', 60)
                else:
                    LOGGER.exception('NO! output returned from R call')
                # HERE READ DATA FROM TEXT FILES
                R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',')
                l_theta = R_resdim[:, 0]
                l_dist = R_resdim[:, 1]
            except:
                msg = 'Dim with R'
                LOGGER.exception(msg)
                raise Exception(msg)

        if (method == 'R_wrap'):
            # from os.path import join
            Rfile = 'localdimension_persistence_serrD.R'
            args = ['Rscript', os.path.join(Rsrc, Rfile),
                    '%s' % model_subset, '%s' % var,
                    '%s' % tmp_dim_fn]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))

            try:
                output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
                LOGGER.info('R outlog info:\n %s ' % output)
                LOGGER.exception('R outlog errors:\n %s ' % error)
                if len(output) > 0:
                    response.update_status('**** Dims with R_wrap suceeded', 60)
                else:
                    LOGGER.exception('NO! output returned from R call')
                # HERE READ DATA FROM TEXT FILES
                R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',')
                l_theta = R_resdim[:, 0]
                l_dist = R_resdim[:, 1]
            except:
                msg = 'Dim with R_wrap'
                LOGGER.exception(msg)
                raise Exception(msg)

        try:
            res_times = get_time(model_subset)
        except:
            LOGGER.debug('Not standard calendar')
            res_times = analogs.get_time_nc(model_subset)

        # plot 1
        ld_pdf = analogs.pdf_from_ld(x=l_dist, y=l_theta)
        #

        res_times=[res_times[i].isoformat().strip().split("T")[0].replace('-','') for i in range(len(res_times))]

        # concatenation of values
        concat_vals = column_stack([res_times, l_theta, l_dist])
        savetxt(dim_filename, concat_vals, fmt='%s', delimiter=',')

        # output season
        try:
            seas = _TIMEREGIONS_[season]['month'] # [12, 1, 2]
            LOGGER.info('Season to grep from TIMEREGIONS: %s ' % season)
            LOGGER.info('Season N to grep from TIMEREGIONS: %s ' % seas)
        except:
            LOGGER.info('No months in TIMEREGIONS, moving to months')
            try:
                seas = _MONTHS_[season]['month'] # [1] or [2] or ...
                LOGGER.info('Season to grep from MONTHS: %s ' % season)
                LOGGER.info('Season N to grep from MONTHS: %s ' % seas)
            except:
                seas = [1,2,3,4,5,6,7,8,9,10,11,12]
        ind = []

        # TODO: change concat_vals[i][0][4:6] to dt_obj.month !!!
        for i in range(len(res_times)):
            if (int(concat_vals[i][0][4:6]) in seas[:]):
                ind.append(i)
        sf = column_stack([concat_vals[i] for i in ind]).T
        seas_dim_filename = season + '_' + dim_filename
        savetxt(seas_dim_filename, sf, fmt='%s', delimiter=',')

        # -------------------------- plot with R ---------------
        R_plot_file = 'plot_csv.R'
        ld2_pdf = 'local_dims.pdf'
        ld2_seas_pdf = season + '_local_dims.pdf'

        args = ['Rscript', os.path.join(Rsrc, R_plot_file),
                '%s' % dim_filename,
                '%s' % ld2_pdf]
        try:
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.exception('R outlog errors:\n %s ' % error)
        except:
            msg = 'Could not produce plot'
            LOGGER.exception(msg)
            # TODO: Here need produce empty pdf(s) to pass to output

        args = ['Rscript', os.path.join(Rsrc, R_plot_file),
                '%s' % seas_dim_filename,
                '%s' % ld2_seas_pdf]
        try:
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.exception('R outlog errors:\n %s ' % error)
        except:
            msg = 'Could not produce plot'
            LOGGER.exception(msg)
            # TODO: Here need produce empty pdf(s) to pass to output
        # 
        # ====================================================

        response.update_status('preparing output', 80)
        response.outputs['ldist'].file = dim_filename
        response.outputs['ldist_seas'].file = seas_dim_filename
        response.outputs['ld_pdf'].file = ld_pdf
        response.outputs['ld2_pdf'].file = ld2_pdf
        response.outputs['ld2_seas_pdf'].file = ld2_seas_pdf

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Exemple #8
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        # response.update_status('execution started at : %s ' % dt.now(), 5)
        # start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 10)
            resource = archiveextract(
                resource=rename_complexinputs(request.inputs['resource']))
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data

            bboxDef = '-20,40,30,70'  # in general format
            # level = 500

            season = request.inputs['season'][0].data

            level = request.inputs['level'][0].data
            if (level == 500):
                dummylevel = 1000  # dummy workaround for cdo sellevel
            else:
                dummylevel = 500
            LOGGER.debug('LEVEL selected: %s hPa' % (level))

            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180)
                    or abs(float(bboxStr[2]) > 90)
                    or abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug(
                    'BBOX is out of the range, using default instead: %s ' %
                    (bboxStr))
                bboxStr = bboxStr.split(',')

            # for i in bboxStr: bbox.append(int(i))
            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            distance = request.inputs['dist'][0].data
            method = request.inputs['method'][0].data

            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 20)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:

            # not nesessary if fix ocgis_module.py
            dateSt = dt.combine(dateSt, dt_time(12, 0))
            dateEn = dt.combine(dateEn, dt_time(12, 0))

            # Check if 360_day calendar:
            try:
                if type(resource) is not list:
                    resource = [resource]

                modcal, calunits = get_calendar(resource[0])
                if '360_day' in modcal:
                    if dateSt.day == 31:
                        dateSt = dateSt.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' %
                                     (dateSt))
                    if dateEn.day == 31:
                        dateEn = dateEn.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' %
                                     (dateEn))
            except:
                LOGGER.debug('Could not detect calendar')

            start = dateSt
            end = dateEn
            time_range = [start, end]

            LOGGER.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in and convert the arguments', 30)

        ########################
        # input data preperation
        ########################

        # TODO: Check if files containing more than one dataset

        response.update_status('Start preparing input data', 40)
        start_time = time.time()  # mesure data preperation ...
        try:
            # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...])

            if type(resource) == list:
                # resource.sort()
                resource = sorted(
                    resource, key=lambda i: path.splitext(path.basename(i))[0])
            else:
                resource = [resource]

            # ===============================================================
            # REMOVE resources which are out of interest from the list
            # (years > and < than requested for calculation)

            tmp_resource = []

            for re in resource:
                s, e = get_timerange(re)
                tmpSt = dt.strptime(s, '%Y%m%d')
                tmpEn = dt.strptime(e, '%Y%m%d')
                if ((tmpSt <= end) and (tmpEn >= start)):
                    tmp_resource.append(re)
                    LOGGER.debug('Selected file: %s ' % (re))
            resource = tmp_resource

            # Try to fix memory issue... (ocgis call for files like 20-30 gb... )
            # IF 4D - select pressure level before domain cut
            #
            # resource properties
            ds = Dataset(resource[0])
            variable = get_variable(resource[0])
            var = ds.variables[variable]
            dims = list(var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id')
            except AttributeError:
                model_id = 'Unknown_model'

            LOGGER.debug('MODEL: %s ' % (model_id))

            lev_units = 'hPa'

            if (dimlen > 3):
                lev = ds.variables[dims[1]]
                # actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units == 'Pa'):
                    level = level * 100
                    dummylevel = dummylevel * 100
                    # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it
                    # Not just level = level * 100.

            # Get Levels
            from cdo import Cdo
            cdo = Cdo(env=environ)

            lev_res = []
            if (dimlen > 3):
                for res_fn in resource:
                    tmp_f = 'lev_' + path.basename(res_fn)
                    try:
                        tmp_f = call(resource=res_fn,
                                     variable=variable,
                                     spatial_wrapping='wrap',
                                     level_range=[int(level),
                                                  int(level)],
                                     prefix=tmp_f[0:-3])
                    except:
                        comcdo = '%s,%s' % (level, dummylevel)
                        cdo.sellevel(comcdo, input=res_fn, output=tmp_f)
                    lev_res.append(tmp_f)
            else:
                lev_res = resource

            # ===============================================================
            # TODO: Before domain, Regrid to selected grid! (???) if no rean.
            # ================================================================

            # Get domain
            regr_res = []
            for res_fn in lev_res:
                tmp_f = 'dom_' + path.basename(res_fn)
                comcdo = '%s,%s,%s,%s' % (bbox[0], bbox[2], bbox[1], bbox[3])
                try:
                    tmp_f = call(resource=res_fn,
                                 geom=bbox,
                                 spatial_wrapping='wrap',
                                 prefix=tmp_f[0:-3])
                except:
                    cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f)
                regr_res.append(tmp_f)

            # ============================
            # Block to collect final data
            if (dimlen > 3):
                res_tmp_tmp = get_level(regr_res, level=level)
                variable = 'z%s' % level
                res_tmp = call(resource=res_tmp_tmp,
                               variable=variable,
                               time_range=time_range)
            else:
                res_tmp = call(resource=regr_res,
                               time_range=time_range,
                               spatial_wrapping='wrap')
            #######################################################################################

        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)
        LOGGER.debug("data preperation took %s seconds.",
                     time.time() - start_time)

        # -----------------------
        # try:
        #     import ctypes
        #     # TODO: This lib is for linux
        #     mkl_rt = ctypes.CDLL('libmkl_rt.so')
        #     nth = mkl_rt.mkl_get_max_threads()
        #     LOGGER.debug('Current number of threads: %s' % (nth))
        #     mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
        #     nth = mkl_rt.mkl_get_max_threads()
        #     LOGGER.debug('NEW number of threads: %s' % (nth))
        #     # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
        #     environ['MKL_NUM_THREADS'] = str(nth)
        #     environ['OMP_NUM_THREADS'] = str(nth)
        # except Exception as e:
        #     msg = 'Failed to set THREADS %s ' % e
        #     LOGGER.debug(msg)
        # -----------------------

        response.update_status('Start DIM calc', 50)

        # Calculation of Local Dimentsions ==================
        LOGGER.debug('Calculation of the distances using: %s metric' %
                     (distance))
        LOGGER.debug('Calculation of the dims with: %s' % (method))

        dim_filename = '%s.txt' % model_id
        tmp_dim_fn = '%s.txt' % uuid.uuid1()
        Rsrc = config.Rsrc_dir()

        if (method == 'Python'):
            try:
                l_dist, l_theta = localdims(resource=res_tmp,
                                            variable=variable,
                                            distance=str(distance))
                response.update_status('**** Dims with Python suceeded', 60)
            except:
                LOGGER.exception('NO! output returned from Python call')

        if (method == 'Python_wrap'):
            try:
                l_dist, l_theta = localdims_par(resource=res_tmp,
                                                variable=variable,
                                                distance=str(distance))
                response.update_status('**** Dims with Python suceeded', 60)
            except:
                LOGGER.exception('NO! output returned from Python call')

        if (method == 'R'):
            # from os.path import join
            Rfile = 'localdimension_persistence_fullD.R'
            args = [
                'Rscript',
                path.join(Rsrc, Rfile),
                '%s' % res_tmp,
                '%s' % variable,
                '%s' % tmp_dim_fn
            ]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))

            try:
                output, error = subprocess.Popen(
                    args, stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE).communicate()
                LOGGER.info('R outlog info:\n %s ' % output)
                LOGGER.exception('R outlog errors:\n %s ' % error)
                if len(output) > 0:
                    response.update_status('**** Dims with R suceeded', 60)
                else:
                    LOGGER.exception('NO! output returned from R call')
                # HERE READ DATA FROM TEXT FILES
                R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',')
                l_theta = R_resdim[:, 0]
                l_dist = R_resdim[:, 1]
            except:
                msg = 'Dim with R'
                LOGGER.exception(msg)
                raise Exception(msg)

        if (method == 'R_wrap'):
            # from os.path import join
            Rfile = 'localdimension_persistence_serrD.R'
            args = [
                'Rscript',
                path.join(Rsrc, Rfile),
                '%s' % res_tmp,
                '%s' % variable,
                '%s' % tmp_dim_fn
            ]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))

            try:
                output, error = subprocess.Popen(
                    args, stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE).communicate()
                LOGGER.info('R outlog info:\n %s ' % output)
                LOGGER.exception('R outlog errors:\n %s ' % error)
                if len(output) > 0:
                    response.update_status('**** Dims with R_wrap suceeded',
                                           60)
                else:
                    LOGGER.exception('NO! output returned from R call')
                # HERE READ DATA FROM TEXT FILES
                R_resdim = loadtxt(fname=tmp_dim_fn, delimiter=',')
                l_theta = R_resdim[:, 0]
                l_dist = R_resdim[:, 1]
            except:
                msg = 'Dim with R_wrap'
                LOGGER.exception(msg)
                raise Exception(msg)

        try:
            res_times = get_time(res_tmp)
        except:
            LOGGER.debug('Not standard calendar')
            res_times = analogs.get_time_nc(res_tmp)

        # plot 1
        ld_pdf = analogs.pdf_from_ld(x=l_dist, y=l_theta)
        #

        res_times = [
            res_times[i].isoformat().strip().split("T")[0].replace('-', '')
            for i in range(len(res_times))
        ]

        # concatenation of values
        concat_vals = column_stack([res_times, l_theta, l_dist])
        savetxt(dim_filename, concat_vals, fmt='%s', delimiter=',')

        # output season
        try:
            seas = _TIMEREGIONS_[season]['month']  # [12, 1, 2]
            LOGGER.info('Season to grep from TIMEREGIONS: %s ' % season)
            LOGGER.info('Season N to grep from TIMEREGIONS: %s ' % seas)
        except:
            LOGGER.info('No months in TIMEREGIONS, moving to months')
            try:
                seas = _MONTHS_[season]['month']  # [1] or [2] or ...
                LOGGER.info('Season to grep from MONTHS: %s ' % season)
                LOGGER.info('Season N to grep from MONTHS: %s ' % seas)
            except:
                seas = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        ind = []

        # TODO: change concat_vals[i][0][4:6] to dt_obj.month !!!
        for i in range(len(res_times)):
            if (int(concat_vals[i][0][4:6]) in seas[:]):
                ind.append(i)
        sf = column_stack([concat_vals[i] for i in ind]).T
        seas_dim_filename = season + '_' + dim_filename
        savetxt(seas_dim_filename, sf, fmt='%s', delimiter=',')

        # -------------------------- plot with R ---------------
        R_plot_file = 'plot_csv.R'
        ld2_pdf = 'local_dims.pdf'
        ld2_seas_pdf = season + '_local_dims.pdf'

        args = [
            'Rscript',
            path.join(Rsrc, R_plot_file),
            '%s' % dim_filename,
            '%s' % ld2_pdf
        ]
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.exception('R outlog errors:\n %s ' % error)
        except:
            msg = 'Could not produce plot'
            LOGGER.exception(msg)
            # TODO: Here need produce empty pdf to pass to output

        args = [
            'Rscript',
            path.join(Rsrc, R_plot_file),
            '%s' % seas_dim_filename,
            '%s' % ld2_seas_pdf
        ]
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.exception('R outlog errors:\n %s ' % error)
        except:
            msg = 'Could not produce plot'
            LOGGER.exception(msg)
            # TODO: Here need produce empty pdf(s) to pass to output
        #

        # ====================================================
        response.update_status('preparing output', 80)

        response.outputs['ldist'].file = dim_filename
        response.outputs['ldist_seas'].file = seas_dim_filename
        response.outputs['ld_pdf'].file = ld_pdf
        response.outputs['ld2_pdf'].file = ld2_pdf
        response.outputs['ld2_seas_pdf'].file = ld2_seas_pdf

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Exemple #9
0
def map_robustness(signal, high_agreement_mask, low_agreement_mask,
                   variable=None, cmap='seismic', title=None,
                   file_extension='png'):
    """
    generates a graphic for the output of the ensembleRobustness process for a lat/long file.

    :param signal: netCDF file containing the signal difference over time
    :param highagreement:
    :param lowagreement:
    :param variable:
    :param cmap: default='seismic',
    :param title: default='Model agreement of signal'
    :returns str: path/to/file.png
    """
    from blackswan import utils
    from numpy import mean, ma

    if variable is None:
        variable = utils.get_variable(signal)

    try:
        var_signal = utils.get_values(signal)
        mask_l = utils.get_values(low_agreement_mask)
        mask_h = utils.get_values(high_agreement_mask)

        # mask_l = ma.masked_where(low < 0.5, low)
        # mask_h = ma.masked_where(high < 0.5, high)
        # mask_l[mask_l == 0] = np.nan
        # mask_h[mask_h == 0] = np.nan

        LOGGER.info('data loaded')

        lats, lons = utils.get_coordinates(signal, unrotate=True)

        if len(lats.shape) == 1:
            cyclic_var, cyclic_lons = add_cyclic_point(var_signal, coord=lons)
            mask_l, cyclic_lons = add_cyclic_point(mask_l, coord=lons)
            mask_h, cyclic_lons = add_cyclic_point(mask_h, coord=lons)

            lons = cyclic_lons.data
            var_signal = cyclic_var

        LOGGER.info('lat lon loaded')

        minval = round(np.nanmin(var_signal))
        maxval = round(np.nanmax(var_signal)+.5)

        LOGGER.info('prepared data for plotting')
    except:
        msg = 'failed to get data for plotting'
        LOGGER.exception(msg)
        raise Exception(msg)

    try:
        fig = plt.figure(facecolor='w', edgecolor='k')

        ax = plt.axes(projection=ccrs.Robinson(central_longitude=int(mean(lons))))
        norm = MidpointNormalize(midpoint=0)

        cs = plt.contourf(lons, lats, var_signal, 60, norm=norm, transform=ccrs.PlateCarree(),
                          cmap=cmap, interpolation='nearest')

        cl = plt.contourf(lons, lats, mask_l, 1, transform=ccrs.PlateCarree(), colors='none', hatches=[None, '/'])
        ch = plt.contourf(lons, lats, mask_h, 1, transform=ccrs.PlateCarree(), colors='none', hatches=[None, '.'])
        # artists, labels = ch.legend_elements()
        # plt.legend(artists, labels, handleheight=2)
        # plt.clim(minval,maxval)
        ax.coastlines()
        ax.gridlines()
        # ax.set_global()

        if title is None:
            plt.title('%s with Agreement' % variable)
        else:
            plt.title(title)
        plt.colorbar(cs)

        plt.annotate('// = low model ensemble agreement', (0, 0), (0, -10),
                     xycoords='axes fraction', textcoords='offset points', va='top')
        plt.annotate('..  = high model ensemble agreement', (0, 0), (0, -20),
                     xycoords='axes fraction', textcoords='offset points', va='top')

        graphic = fig2plot(fig=fig, file_extension=file_extension)
        plt.close()

        LOGGER.info('Plot created and figure saved')
    except:
        msg = 'failed to plot graphic'
        LOGGER.exception(msg)

    return graphic
Exemple #10
0
    def _handler(self, request, response):
        chdir(self.workdir)
        init_process_logger('log.txt')

        process_start_time = time.time()  # measure process execution time ...
        response.update_status('execution started at : %s ' % dt.now(), 5)

        start_time = time.time()  # measure init ...

        resource = archiveextract(
            resource=rename_complexinputs(request.inputs['resource']))

        # Filter resource:
        if type(resource) == list:
            resource = sorted(resource,
                              key=lambda i: path.splitext(path.basename(i))[0])
        else:
            resource = [resource]

        refSt = request.inputs['refSt'][0].data
        refEn = request.inputs['refEn'][0].data
        dateSt = request.inputs['dateSt'][0].data
        dateEn = request.inputs['dateEn'][0].data

        regrset = request.inputs['regrset'][0].data
        direction = request.inputs['direction'][0].data
        # Check if model has 360_day calendar:

        try:
            modcal, calunits = get_calendar(resource[0])
            LOGGER.debug('CALENDAR: %s' % (modcal))
            if '360_day' in modcal:
                if direction == 're2mo':
                    if refSt.day == 31:
                        refSt = refSt.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (refSt))
                    if refEn.day == 31:
                        refEn = refEn.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (refEn))
                else:  # mo2re
                    if dateSt.day == 31:
                        dateSt = dateSt.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' %
                                     (dateSt))
                    if dateEn.day == 31:
                        dateEn = dateEn.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' %
                                     (dateEn))
        except:
            LOGGER.debug('Could not detect calendar')

        seasonwin = request.inputs['seasonwin'][0].data
        nanalog = request.inputs['nanalog'][0].data

        bboxDef = '-20,40,30,70'  # in general format

        bbox = []
        bboxStr = request.inputs['BBox'][0].data
        LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
        bboxStr = bboxStr.split(',')

        # Checking for wrong cordinates and apply default if nesessary
        if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180)
                or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90):
            bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
            LOGGER.debug(
                'BBOX is out of the range, using default instead: %s ' %
                (bboxStr))
            bboxStr = bboxStr.split(',')

        bbox.append(float(bboxStr[0]))
        bbox.append(float(bboxStr[2]))
        bbox.append(float(bboxStr[1]))
        bbox.append(float(bboxStr[3]))

        normalize = request.inputs['normalize'][0].data
        plot = request.inputs['plot'][0].data
        distance = request.inputs['dist'][0].data
        outformat = request.inputs['outformat'][0].data
        timewin = request.inputs['timewin'][0].data

        model_var = request.inputs['reanalyses'][0].data
        model, var = model_var.split('_')

        try:
            if direction == 're2mo':
                anaSt = dt.combine(dateSt, dt_time(
                    0, 0))  # dt.strptime(dateSt[0], '%Y-%m-%d')
                anaEn = dt.combine(dateEn, dt_time(
                    0, 0))  # dt.strptime(dateEn[0], '%Y-%m-%d')
                refSt = dt.combine(refSt, dt_time(
                    12, 0))  # dt.strptime(refSt[0], '%Y-%m-%d')
                refEn = dt.combine(refEn, dt_time(
                    12, 0))  # dt.strptime(refEn[0], '%Y-%m-%d')
                r_time_range = [anaSt, anaEn]
                m_time_range = [refSt, refEn]
            elif direction == 'mo2re':
                anaSt = dt.combine(dateSt, dt_time(
                    12, 0))  # dt.strptime(refSt[0], '%Y-%m-%d')
                anaEn = dt.combine(dateEn, dt_time(
                    12, 0))  # dt.strptime(refEn[0], '%Y-%m-%d')
                refSt = dt.combine(refSt, dt_time(
                    0, 0))  # dt.strptime(dateSt[0], '%Y-%m-%d')
                refEn = dt.combine(refEn, dt_time(
                    0, 0))  # dt.strptime(dateEn[0], '%Y-%m-%d')
                r_time_range = [refSt, refEn]
                m_time_range = [anaSt, anaEn]
            else:
                LOGGER.exception(
                    'failed to find time periods for comparison direction')
        except:
            msg = 'failed to put simulation and reference time in order'
            LOGGER.exception(msg)
            raise Exception(msg)

        if normalize == 'None':
            seacyc = False
        else:
            seacyc = True

        if outformat == 'ascii':
            outformat = '.txt'
        elif outformat == 'netCDF':
            outformat = '.nc'
        else:
            LOGGER.exception('output format not valid')

        try:
            if model == 'NCEP':
                # getlevel = True
                getlevel = False
                if 'z' in var:
                    level = var.strip('z')
                    variable = 'hgt'
                    # conform_units_to='hPa'
                else:
                    variable = 'slp'
                    level = None
                    # conform_units_to='hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in var:
                    variable = 'hgt'
                    level = var.strip('z')
                    # conform_units_to=None
                else:
                    variable = 'prmsl'
                    level = None
                    # conform_units_to='hPa'
            else:
                LOGGER.exception('Reanalyses model not known')
            LOGGER.info('environment set')
        except:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        # LOGGER.exception("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in the arguments', 10)

        #################
        # get input data
        #################
        # TODO: do not forget to select years

        start_time = time.time()  # measure get_input_data ...
        response.update_status('fetching input data', 20)
        try:
            if direction == 're2mo':
                nc_reanalyses = reanalyses(start=anaSt.year,
                                           end=anaEn.year,
                                           variable=var,
                                           dataset=model,
                                           getlevel=getlevel)
            else:
                nc_reanalyses = reanalyses(start=refSt.year,
                                           end=refEn.year,
                                           variable=var,
                                           dataset=model,
                                           getlevel=getlevel)

            if type(nc_reanalyses) == list:
                nc_reanalyses = sorted(
                    nc_reanalyses,
                    key=lambda i: path.splitext(path.basename(i))[0])
            else:
                nc_reanalyses = [nc_reanalyses]

            # For 20CRV2 geopotential height, daily dataset for 100 years is about 50 Gb
            # So it makes sense, to operate it step-by-step
            # TODO: need to create dictionary for such datasets (for models as well)
            # TODO: benchmark the method bellow for NCEP z500 for 60 years, may be use the same (!)
            # TODO Now everything regrid to the reanalysis

            # if ('20CRV2' in model) and ('z' in var):
            if ('z' in var):
                tmp_total = []
                origvar = get_variable(nc_reanalyses[0])

                for z in nc_reanalyses:
                    # tmp_n = 'tmp_%s' % (uuid.uuid1())
                    b0 = call(resource=z,
                              variable=origvar,
                              level_range=[int(level), int(level)],
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='levdom_' + path.basename(z)[0:-3])
                    tmp_total.append(b0)

                tmp_total = sorted(
                    tmp_total,
                    key=lambda i: path.splitext(path.basename(i))[0])
                inter_subset_tmp = call(resource=tmp_total,
                                        variable=origvar,
                                        time_range=r_time_range)

                # Create new variable
                ds = Dataset(inter_subset_tmp, mode='a')
                z_var = ds.variables.pop(origvar)
                dims = z_var.dimensions
                new_var = ds.createVariable('z%s' % level,
                                            z_var.dtype,
                                            dimensions=(dims[0], dims[2],
                                                        dims[3]))
                new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                nc_subset = call(inter_subset_tmp, variable='z%s' % level)
                # Clean
                for i in tmp_total:
                    tbr = 'rm -f %s' % (i)
                    system(tbr)
                # for i in inter_subset_tmp
                tbr = 'rm -f %s' % (inter_subset_tmp)
                system(tbr)
            else:
                # TODO: ADD HERE serial as well as in analogs reanalysis process!!
                nc_subset = call(
                    resource=nc_reanalyses,
                    variable=var,
                    geom=bbox,
                    spatial_wrapping='wrap',
                    time_range=r_time_range,
                )

            response.update_status('**** Input reanalyses data fetched', 30)
        except:
            msg = 'failed to fetch or subset input files'
            LOGGER.exception(msg)
            raise Exception(msg)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 40)

        m_start = m_time_range[0]
        m_end = m_time_range[1]

        # ===============================================================
        # REMOVE resources from the list which are out of interest from the list
        # (years > and < than requested for calculation)

        tmp_resource = []

        for re in resource:
            s, e = get_timerange(re)
            tmpSt = dt.strptime(s, '%Y%m%d')
            tmpEn = dt.strptime(e, '%Y%m%d')
            if ((tmpSt <= m_end) and (tmpEn >= m_start)):
                tmp_resource.append(re)
                LOGGER.debug('Selected file: %s ' % (re))
        resource = tmp_resource

        start_time = time.time()  # mesure data preperation ...
        # TODO: Check the callendars ! for model vs reanalyses.
        # TODO: Check the units! model vs reanalyses.
        try:
            m_total = []
            modvar = get_variable(resource)
            # resource properties
            ds = Dataset(resource[0])
            m_var = ds.variables[modvar]
            dims = list(m_var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id')
            except AttributeError:
                model_id = 'Unknown model'

            LOGGER.debug('MODEL: %s ' % (model_id))

            lev_units = 'hPa'

            if (dimlen > 3):
                lev = ds.variables[dims[1]]
                # TODO: actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units == 'Pa'):
                    m_level = str(int(level) * 100)
                else:
                    m_level = level
            else:
                m_level = None

            if level == None:
                level_range = None
            else:
                level_range = [int(m_level), int(m_level)]

            ds.close()

            for z in resource:
                tmp_n = 'tmp_%s' % (uuid.uuid1())
                # TODO: Important! if only 1 file - select time period from that first!

                # select level and regrid

                # \/\/ working version 19Feb2019
                # b0 = call(resource=z, variable=modvar, level_range=level_range,
                #         spatial_wrapping='wrap', cdover='system',
                #         regrid_destination=nc_reanalyses[0], regrid_options='bil', prefix=tmp_n)

                try:
                    b0 = call(resource=z,
                              variable=modvar,
                              level_range=level_range,
                              spatial_wrapping='wrap',
                              cdover='system',
                              regrid_destination=nc_subset,
                              regrid_options='bil',
                              prefix=tmp_n)
                except:
                    b0 = call(resource=z,
                              variable=modvar,
                              level_range=level_range,
                              spatial_wrapping='wrap',
                              cdover='system',
                              regrid_destination=nc_reanalyses[0],
                              regrid_options='bil',
                              prefix=tmp_n)

                # select domain (already selected in fact, if regrided to 'nc_subset')
                b01 = call(resource=b0,
                           geom=bbox,
                           spatial_wrapping='wrap',
                           prefix='levregr_' + path.basename(z)[0:-3])

                # TODO: REPLACE rm -f by os.remove() !
                tbr = 'rm -f %s' % (b0)
                system(tbr)
                tbr = 'rm -f %s.nc' % (tmp_n)
                system(tbr)
                # get full resource
                m_total.append(b01)

            model_subset = call(m_total, time_range=m_time_range)

            for i in m_total:
                tbr = 'rm -f %s' % (i)
                system(tbr)

            if m_level is not None:
                # Create new variable in model set
                ds = Dataset(model_subset, mode='a')
                mod_var = ds.variables.pop(modvar)
                dims = mod_var.dimensions
                new_modvar = ds.createVariable('z%s' % level,
                                               mod_var.dtype,
                                               dimensions=(dims[0], dims[2],
                                                           dims[3]))
                new_modvar[:, :, :] = squeeze(mod_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                mod_subset = call(model_subset, variable='z%s' % level)
            else:
                mod_subset = model_subset

        except:
            msg = 'failed to subset simulation or reference data'
            LOGGER.exception(msg)
            raise Exception(msg)

# --------------------------------------------
        try:
            if direction == 'mo2re':
                simulation = mod_subset
                archive = nc_subset
                base_id = model
                sim_id = model_id
            elif direction == 're2mo':
                simulation = nc_subset
                archive = mod_subset
                base_id = model_id
                sim_id = model
            else:
                LOGGER.exception('direction not valid: %s ' % direction)
        except:
            msg = 'failed to find comparison direction'
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if level is not None:
                out_var = 'z%s' % level
            else:
                var_archive = get_variable(archive)
                var_simulation = get_variable(simulation)
                if var_archive != var_simulation:
                    rename_variable(archive,
                                    oldname=var_archive,
                                    newname=var_simulation)
                    out_var = var_simulation
                    LOGGER.info('varname %s in netCDF renamed to %s' %
                                (var_archive, var_simulation))
        except:
            msg = 'failed to rename variable in target files'
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except:
            msg = 'failed to prepare seasonal cycle reference files'
            LOGGER.exception(msg)
            raise Exception(msg)

        # ip, output = mkstemp(dir='.', suffix='.txt')
        # output_file = path.abspath(output)

        output_file = 'output.txt'

        ################################
        # Prepare names for config.txt #
        ################################

        # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
        # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')

        # Fix < 1900 issue...
        refDatesString = refSt.isoformat().strip().split(
            "T")[0] + "_" + refEn.isoformat().strip().split("T")[0]
        simDatesString = dateSt.isoformat().strip().split(
            "T")[0] + "_" + dateEn.isoformat().strip().split("T")[0]

        archiveNameString = "base_" + out_var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3]) + '.nc'
        simNameString = "sim_" + out_var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                        % (bbox[0], bbox[2], bbox[1], bbox[3]) + '.nc'

        move(archive, archiveNameString)
        move(simulation, simNameString)

        archive = archiveNameString
        simulation = simNameString

        files = [path.abspath(archive), path.abspath(simulation), output_file]

        ############################
        # generating the config file
        ############################

        response.update_status('writing config file', 50)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                base_id=base_id,
                sim_id=sim_id,
                timewin=timewin,
                # varname=var,
                varname=out_var,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
                period=[
                    refSt.isoformat().strip().split("T")[0],
                    refEn.isoformat().strip().split("T")[0]
                ],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except:
            msg = 'failed to generate config file'
            LOGGER.exception(msg)
            raise Exception(msg)

        #######################
        # CASTf90 call
        #######################
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90

        response.update_status('Start CASTf90 call', 60)

        # -----------------------
        try:
            import ctypes
            # TODO: This lib is for linux
            mkl_rt = ctypes.CDLL('libmkl_rt.so')
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('Current number of threads: %s' % (nth))
            mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('NEW number of threads: %s' % (nth))
            # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
            environ['MKL_NUM_THREADS'] = str(nth)
            environ['OMP_NUM_THREADS'] = str(nth)
        except Exception as e:
            msg = 'Failed to set THREADS %s ' % e
            LOGGER.debug(msg)
        # -----------------------

        # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ###############
        # ##### MUST be removed after castf90 recompiled with the latest hdf version
        # ##### NOT safe
        environ['HDF5_DISABLE_VERSION_CHECK'] = '1'
        # hdflib = os.path.expanduser("~") + '/anaconda/lib'
        # hdflib = os.getenv("HOME") + '/anaconda/lib'
        import pwd
        hdflib = pwd.getpwuid(getuid()).pw_dir + '/anaconda/lib'
        environ['LD_LIBRARY_PATH'] = hdflib
        # ################################################################################

        try:
            response.update_status('execution of CASTf90', 70)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('analogue.out info:\n %s ' % output)
            LOGGER.exception('analogue.out errors:\n %s ' % error)
            response.update_status('**** CASTf90 suceeded', 80)
        except:
            msg = 'CASTf90 failed'
            LOGGER.exception(msg)
            raise Exception(msg)

        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        # TODO: Add try - except for pdfs
        if plot == 'Yes':
            analogs_pdf = analogs.plot_analogs(configfile=config_file)
        else:
            analogs_pdf = 'dummy_plot.pdf'
            with open(analogs_pdf, 'a'):
                utime(analogs_pdf, None)

        response.update_status('preparting output', 90)

        # Stopper to keep twitcher results, for debug
        # dummy=dummy
        response.outputs['analog_pdf'].file = analogs_pdf
        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        # response.update_status('reformatted analog file', 95)
        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 95)
        LOGGER.info('rendered pages: %s ', viewer_html)
        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        response.outputs['output_log'].file = 'log.txt'
        return response
Exemple #11
0
def get_anomalies(nc_file, frac=0.2, reference=None, method='ocgis', sseas='serial', variable=None):
    """
    Anomalisation of data subsets for weather classification by subtracting a smoothed annual cycle

    :param nc_file: input netCDF file
    :param frac: Number between 0-1 for strength of smoothing
               (0 = close to the original data, 1 = flat line)
               default = 0.2
    :param reference: Period to calculate annual cycle

    :returns str: path to output netCDF file
    """
    from netCDF4 import Dataset
    from os import environ

    if variable is None:
        variable = get_variable(nc_file)
        # if more when 2 variables:
        if (variable.count(variable)==0):
            _ds = Dataset(nc_file)
            # Works only if we have one 3D variables
            for j in variable:
                if len(_ds.variables[j].dimensions)==3: _var=j
            variable = _var
            _ds.close()
    LOGGER.debug('3D Variable selected: %s' % (variable))

    try:
        if (method == 'cdo'):
            from cdo import Cdo
            from os import system

            ip2, nc_anual_cycle = mkstemp(dir='.', suffix='.nc')

            cdo = Cdo(env=environ)
            # ip, nc_anual_cycle_tmp = mkstemp(dir='.', suffix='.nc')
            # TODO: if reference is none, use utils.get_time for nc_file to set the ref range
            #       But will need to fix 360_day issue (use get_time_nc from analogs)

            # com = 'seldate'
            # comcdo = 'cdo %s,%s-%s-%s,%s-%s-%s %s %s' % (com, reference[0].year, reference[0].month, reference[0].day,
            #                                              reference[1].year, reference[1].month, reference[1].day,
            #                                              nc_file, nc_anual_cycle_tmp)
            # LOGGER.debug('CDO: %s' % (comcdo))
            # system(comcdo)

            # Sub cdo with this trick... Cdo keeps the precision and anomalies are integers...
            calc = '%s=%s' % (variable, variable)
            nc_anual_cycle_tmp = call(nc_file, time_range=reference, variable=variable, calc=calc)
            nc_anual_cycle = cdo.ydaymean(input=nc_anual_cycle_tmp, output=nc_anual_cycle)
        else:
            calc = [{'func': 'mean', 'name': variable}]
            calc_grouping = calc_grouping = ['day', 'month']
            nc_anual_cycle = call(nc_file,
                                  calc=calc,
                                  calc_grouping=calc_grouping,
                                  variable=variable,
                                  time_range=reference)
        LOGGER.info('annual cycle calculated: %s' % (nc_anual_cycle))

    except Exception as e:
        msg = 'failed to calcualte annual cycle %s' % e
        LOGGER.error(msg)
        raise Exception(msg)

    try:
        # spline for smoothing
        # import statsmodels.api as sm
        # from numpy import tile, empty, linspace
        from cdo import Cdo
        cdo = Cdo(env=environ)
        # variable = utils.get_variable(nc_file)
        ds = Dataset(nc_anual_cycle, mode='a')
        vals = ds.variables[variable]
        vals_sm = empty(vals.shape)
        ts = vals.shape[0]
        x = linspace(1, ts*3, num=ts*3, endpoint=True)

        if ('serial' not in sseas):
            # Multiprocessing =======================
            # -----------------------
            try:
                import ctypes
                # TODO: This lib is for linux
                mkl_rt = ctypes.CDLL('libmkl_rt.so')
                nth = mkl_rt.mkl_get_max_threads()
                LOGGER.debug('Current number of threads: %s' % (nth))
                mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
                nth = mkl_rt.mkl_get_max_threads()
                LOGGER.debug('NEW number of threads: %s' % (nth))
                # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
                environ['MKL_NUM_THREADS'] = str(nth)
                environ['OMP_NUM_THREADS'] = str(nth)
            except Exception as e:
                msg = 'Failed to set THREADS %s ' % e
                LOGGER.debug(msg)
            # -----------------------

            from multiprocessing import Pool
            pool = Pool()

            valex = [0.]
            valex = valex*vals.shape[1]*vals.shape[2]

            # TODO redo with reshape
            ind = 0
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    valex[ind] = vals[:, lat, lon]
                    ind += 1

            LOGGER.debug('Start smoothing with multiprocessing')
            # TODO fraction option frac=... is not used here
            tmp_sm = pool.map(_smooth, valex)
            pool.close()
            pool.join()

            # TODO redo with reshape
            ind = 0
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    vals_sm[:, lat, lon] = tmp_sm[ind]
                    ind += 1
        else:
            # Serial ==================================
            vals_sm = empty(vals.shape)
            for lat in range(vals.shape[1]):
                for lon in range(vals.shape[2]):
                    try:
                        y = tile(vals[:, lat, lon], 3)
                        # ys = smooth(y, window_size=91, order=2, deriv=0, rate=1)[ts:ts*2]
                        ys = sm.nonparametric.lowess(y, x, frac=frac)[ts:ts*2, 1]
                        vals_sm[:, lat, lon] = ys
                    except:
                        msg = 'failed for lat %s lon %s' % (lat, lon)
                        LOGGER.exception(msg)
                        raise Exception(msg)
                LOGGER.debug('done for %s - %s ' % (lat, lon))

        vals[:, :, :] = vals_sm[:, :, :]
        ds.close()
        LOGGER.info('smothing of annual cycle done')
    except:
        msg = 'failed smothing of annual cycle'
        LOGGER.exception(msg)
        raise Exception(msg)
    try:
        ip, nc_anomal = mkstemp(dir='.', suffix='.nc')
        try:
            nc_anomal = cdo.sub(input=[nc_file, nc_anual_cycle], output=nc_anomal)
            LOGGER.info('cdo.sub; anomalisation done: %s ' % nc_anomal)
        except:
            # bug cdo: https://code.mpimet.mpg.de/boards/1/topics/3909
            ip3, nc_in1 = mkstemp(dir='.', suffix='.nc')
            ip4, nc_in2 = mkstemp(dir='.', suffix='.nc')
            ip5, nc_out = mkstemp(dir='.', suffix='.nc')
            nc_in1 = cdo.selvar(variable, input=nc_file, output=nc_in1)
            nc_in2 = cdo.selvar(variable, input=nc_anual_cycle, output=nc_in2)
            nc_out = cdo.sub(input=[nc_in1, nc_in2], output=nc_out)
            nc_anomal = nc_out
    except:
        msg = 'failed substraction of annual cycle'
        LOGGER.exception(msg)
        raise Exception(msg)
    return nc_anomal
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        from datetime import datetime as dt
        from blackswan import weatherregimes as wr
        from tempfile import mkstemp

        response.update_status('execution started at : {}'.format(dt.now()), 5)

        ################################
        # reading in the input arguments
        ################################
        LOGGER.info('read in the arguments')
        # resources = self.getInputValues(identifier='resources')
        season = request.inputs['season'][0].data
        LOGGER.info('season %s', season)

        bboxDef = '-80,50,20,70'  # in general format

        bbox = []
        bboxStr = request.inputs['BBox'][0].data
        LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
        bboxStr = bboxStr.split(',')

        # Checking for wrong cordinates and apply default if nesessary
        if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180)
                or abs(float(bboxStr[2]) > 90) or abs(float(bboxStr[3])) > 90):
            bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
            LOGGER.debug(
                'BBOX is out of the range, using default instead: %s ' %
                (bboxStr))
            bboxStr = bboxStr.split(',')

        bbox.append(float(bboxStr[0]))
        bbox.append(float(bboxStr[2]))
        bbox.append(float(bboxStr[1]))
        bbox.append(float(bboxStr[3]))
        LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
        LOGGER.debug('BBOX original: %s ' % (bboxStr))

        model_var = request.inputs['reanalyses'][0].data
        model, variable = model_var.split('_')

        period = request.inputs['period'][0].data
        LOGGER.info('period %s', period)
        anualcycle = request.inputs['anualcycle'][0].data
        kappa = request.inputs['kappa'][0].data
        LOGGER.info('kappa %s', kappa)

        method = request.inputs['method'][0].data
        LOGGER.info('Calc annual cycle with %s', method)

        sseas = request.inputs['sseas'][0].data
        LOGGER.info('Annual cycle calc with %s', sseas)

        start = dt.strptime(period.split('-')[0], '%Y%m%d')
        end = dt.strptime(period.split('-')[1], '%Y%m%d')
        LOGGER.debug('start: %s , end: %s ' % (start, end))

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 10)

        try:
            if model == 'NCEP':
                getlevel = False
                if 'z' in variable:
                    level = variable.strip('z')
                    # conform_units_to = None
                else:
                    level = None
                    # conform_units_to = 'hPa'
            elif '20CRV2' in model:
                getlevel = False
                if 'z' in variable:
                    level = variable.strip('z')
                    # conform_units_to = None
                else:
                    level = None
                    # conform_units_to = 'hPa'
            else:
                LOGGER.exception('Reanalyses dataset not known')
            LOGGER.info('environment set for model: %s' % model)
        except:
            msg = 'failed to set environment'
            LOGGER.exception(msg)
            raise Exception(msg)

        ##########################################
        # fetch Data from original data archive
        ##########################################

        from blackswan.datafetch import reanalyses as rl
        from blackswan.utils import get_variable
        # from os.path import basename, splitext
        from os import system
        from netCDF4 import Dataset
        from numpy import squeeze

        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=variable,
                          getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('fetching data done', 20)
        ############################################################
        # get the required bbox and time region from resource data
        ############################################################

        response.update_status('subsetting region of interest', 30)

        time_range = [start, end]

        ############################################################
        # Block of level and domain selection for geop huge dataset
        ############################################################

        LevMulti = False

        # ===========================================================================================
        # Temporary add step-by-step also for pressure... for slow VM machine...
        if ('z' in variable) or ('p' in variable):
            tmp_total = []
            origvar = get_variable(model_nc)

            if ('z' in variable):
                level_range = [int(level), int(level)]
            else:
                level_range = None

            if (LevMulti == False):
                for z in model_nc:
                    b0 = call(resource=z,
                              variable=origvar,
                              level_range=level_range,
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='levdom_' + basename(z)[0:-3])
                    tmp_total.append(b0)
            else:
                # multiproc - no inprovements yet, need to check in hi perf machine...
                # -----------------------
                try:
                    import ctypes
                    import os
                    # TODO: This lib is for linux
                    mkl_rt = ctypes.CDLL('libmkl_rt.so')
                    nth = mkl_rt.mkl_get_max_threads()
                    LOGGER.debug('Current number of threads: %s' % (nth))
                    mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
                    nth = mkl_rt.mkl_get_max_threads()
                    LOGGER.debug('NEW number of threads: %s' % (nth))
                    # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
                    os.environ['MKL_NUM_THREADS'] = str(nth)
                    os.environ['OMP_NUM_THREADS'] = str(nth)
                except Exception as e:
                    msg = 'Failed to set THREADS %s ' % e
                    LOGGER.debug(msg)
                # -----------------------

                from multiprocessing import Pool
                pool = Pool()
                # from multiprocessing.dummy import Pool as ThreadPool
                # pool = ThreadPool()
                tup_var = [origvar] * len(model_nc)
                tup_lev = [level] * len(model_nc)
                tup_bbox = [bbox] * len(model_nc)
                tup_args = zip(model_nc, tup_var, tup_lev, tup_bbox)

                tmp_total = pool.map(ocgis_call_wrap, tup_args)
                pool.close()
                pool.join()

            LOGGER.debug('Temporal subset files: %s' % (tmp_total))

            tmp_total = sorted(tmp_total,
                               key=lambda i: splitext(basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total,
                                    variable=origvar,
                                    time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                system(tbr)

            if ('z' in variable):
                # Create new variable for Z geop
                ds = Dataset(inter_subset_tmp, mode='a')
                z_var = ds.variables.pop(origvar)
                dims = z_var.dimensions
                new_var = ds.createVariable('z%s' % level,
                                            z_var.dtype,
                                            dimensions=(dims[0], dims[2],
                                                        dims[3]))
                new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
                # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
                ds.close()
                model_subset = call(inter_subset_tmp, variable='z%s' % level)
            else:
                model_subset = inter_subset_tmp
        else:
            model_subset = call(
                resource=model_nc,
                variable=variable,
                geom=bbox,
                spatial_wrapping='wrap',
                time_range=time_range,
                # conform_units_to=conform_units_to
            )
        # =============================================================================================
        LOGGER.info('Dataset subset done: %s ', model_subset)

        response.update_status('dataset subsetted', 40)
        ##############################################
        # computing anomalies
        ##############################################
        response.update_status('computing anomalies ', 50)

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[1]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        LOGGER.info('reference time: %s', reference)

        model_anomal = wr.get_anomalies(model_subset,
                                        reference=reference,
                                        method=method,
                                        sseas=sseas)  # , variable=variable)

        #####################
        # extracting season
        #####################
        response.update_status('normalizing data', 60)
        model_season = wr.get_season(model_anomal, season=season)

        response.update_status('anomalies computed and  normalized', 70)
        #######################
        # call the R scripts
        #######################
        response.update_status('Start weather regime clustering ', 80)
        import shlex
        import subprocess
        from blackswan import config
        from os.path import curdir, exists, join

        try:
            # rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_model.R'

            infile = model_season  # model_subset #model_ponderate
            # modelname = model
            # yr1 = start.year
            # yr2 = end.year
            ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % infile,
                '%s' % variable,
                '%s' % output_graphics,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % model_var,
                '%s' % kappa
            ]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))
        except:
            msg = 'failed to build the R command'
            LOGGER.exception(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.exception('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                response.update_status('**** weatherregime in R suceeded', 90)
            else:
                LOGGER.exception('NO! output returned from R call')
        except:
            msg = 'weatherregime in R'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('Weather regime clustering done ', 95)
        ############################################
        # set the outputs
        ############################################
        # response.update_status('Set the process outputs ', 96)

        response.outputs['Routput_graphic'].file = output_graphics
        response.outputs['output_pca'].file = file_pca
        response.outputs['output_classification'].file = file_class
        response.outputs['output_netcdf'].file = model_subset
        response.update_status('done', 100)
        return response
Exemple #13
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        # response.update_status('execution started at : %s ' % dt.now(), 5)
        # start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 10)
            resource = archiveextract(resource=rename_complexinputs(request.inputs['resource']))
            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data
            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data

            bboxDef = '-20,40,30,70'  # in general format
            # level = 500

            level = request.inputs['level'][0].data
            if (level == 500):
                dummylevel = 1000  # dummy workaround for cdo sellevel
            else:
                dummylevel = 500
            LOGGER.debug('LEVEL selected: %s hPa' % (level))

            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or
                    abs(float(bboxStr[1]) > 180) or
                    abs(float(bboxStr[2]) > 90) or
                    abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr))
                bboxStr = bboxStr.split(',')

            # for i in bboxStr: bbox.append(int(i))
            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            normalize = request.inputs['normalize'][0].data
            plot = request.inputs['plot'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data
            detrend = request.inputs['detrend'][0].data

            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 20)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:

            # not nesessary if fix ocgis_module.py
            refSt = dt.combine(refSt, dt_time(12, 0))
            refEn = dt.combine(refEn, dt_time(12, 0))
            dateSt = dt.combine(dateSt, dt_time(12, 0))
            dateEn = dt.combine(dateEn, dt_time(12, 0))

            # Check if 360_day calendar:
            try:
                if type(resource) is not list:
                    resource = [resource]

                modcal, calunits = get_calendar(resource[0])
                if '360_day' in modcal:
                    if refSt.day == 31:
                        refSt = refSt.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (refSt))
                    if refEn.day == 31:
                        refEn = refEn.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (refEn))
                    if dateSt.day == 31:
                        dateSt = dateSt.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (dateSt))
                    if dateEn.day == 31:
                        dateEn = dateEn.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (dateEn))
            except:
                LOGGER.debug('Could not detect calendar')

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.error('output format not valid')

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            LOGGER.info('environment set')
        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("init took %s seconds.", time.time() - start_time)
        response.update_status('Read in and convert the arguments', 30)

        ########################
        # input data preperation
        ########################

        # TODO: Check if files containing more than one dataset

        response.update_status('Start preparing input data', 40)
        start_time = time.time()  # mesure data preperation ...
        try:
            # TODO: Add selection of the level. maybe bellow in call(..., level_range=[...,...])

            if type(resource) == list:
                # resource.sort()
                resource = sorted(resource, key=lambda i: path.splitext(path.basename(i))[0])
            else:
                resource = [resource]

            # ===============================================================
            # REMOVE resources which are out of interest from the list
            # (years > and < than requested for calculation)

            tmp_resource = []

            for re in resource:
                s,e = get_timerange(re)
                tmpSt = dt.strptime(s, '%Y%m%d')
                tmpEn = dt.strptime(e, '%Y%m%d')
                if ((tmpSt <= end) and (tmpEn >= start)):
                    tmp_resource.append(re)
                    LOGGER.debug('Selected file: %s ' % (re))
            resource = tmp_resource

            # Try to fix memory issue... (ocgis call for files like 20-30 gb... )
            # IF 4D - select pressure level before domain cut
            #
            # resource properties
            ds = Dataset(resource[0])
            variable = get_variable(resource[0])
            var = ds.variables[variable]
            dims = list(var.dimensions)
            dimlen = len(dims)

            try:
                model_id = ds.getncattr('model_id')
            except AttributeError:
                model_id = 'Unknown model'

            LOGGER.debug('MODEL: %s ' % (model_id))

            lev_units = 'hPa'

            if (dimlen > 3):
                lev = ds.variables[dims[1]]
                # actually index [1] need to be detected... assuming zg(time, plev, lat, lon)
                lev_units = lev.units

                if (lev_units == 'Pa'):
                    level = level * 100
                    dummylevel = dummylevel * 100
                    # TODO: OR check the NAME and units of vertical level and find 200 , 300, or 500 mbar in it
                    # Not just level = level * 100.

            # Get Levels
            from cdo import Cdo
            cdo = Cdo(env=environ)

            lev_res = []
            if(dimlen > 3):
                for res_fn in resource:
                    tmp_f = 'lev_' + path.basename(res_fn)
                    try:
                        tmp_f = call(resource=res_fn, variable=variable, spatial_wrapping='wrap',
                                     level_range=[int(level), int(level)], prefix=tmp_f[0:-3])
                    except:
                        comcdo = '%s,%s' % (level, dummylevel)
                        cdo.sellevel(comcdo, input=res_fn, output=tmp_f)
                    lev_res.append(tmp_f)
            else:
                lev_res = resource

            # ===============================================================
            # TODO: Before domain, Regrid to selected grid! (???) if no rean.
            # ================================================================

            # Get domain
            regr_res = []
            for res_fn in lev_res:
                tmp_f = 'dom_' + path.basename(res_fn)
                comcdo = '%s,%s,%s,%s' % (bbox[0], bbox[2], bbox[1], bbox[3])
                try:
                    tmp_f = call(resource=res_fn, geom=bbox, spatial_wrapping='wrap', prefix=tmp_f[0:-3])
                except:
                    cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f)
                regr_res.append(tmp_f)

            # ============================
            # Block to Detrend data
            # TODO 1 Keep trend as separate file
            # TODO 2 Think how to add options to plot abomalies AND original data...
            #        May be do archive and simulation = call.. over NOT detrended data and keep it as well
            if (dimlen > 3):
                res_tmp = get_level(regr_res, level=level)
                variable = 'z%s' % level
            else:
                res_tmp = call(resource=regr_res, spatial_wrapping='wrap')

            if detrend == 'None':
                orig_model_subset = res_tmp
            else:
                orig_model_subset = remove_mean_trend(res_tmp, varname=variable)

            # ============================

#            archive_tmp = call(resource=regr_res, time_range=[refSt, refEn], spatial_wrapping='wrap')
#            simulation_tmp = call(resource=regr_res, time_range=[dateSt, dateEn], spatial_wrapping='wrap')

            ################################
            # Prepare names for config.txt #
            ################################

            # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
            # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')

            # Fix < 1900 issue...
            refDatesString = refSt.isoformat().strip().split("T")[0] + "_" + refEn.isoformat().strip().split("T")[0]
            simDatesString = dateSt.isoformat().strip().split("T")[0] + "_" + dateEn.isoformat().strip().split("T")[0]

            archiveNameString = "base_" + variable + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                                % (bbox[0], bbox[2], bbox[1], bbox[3])
            simNameString = "sim_" + variable + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])

            archive = call(resource=res_tmp, time_range=[refSt, refEn], spatial_wrapping='wrap', prefix=archiveNameString)
            simulation = call(resource=res_tmp, time_range=[dateSt, dateEn], spatial_wrapping='wrap', prefix=simNameString)

            #######################################################################################

            if seacyc is True:
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(archive, simulation, method=normalize)
            else:
                seasoncyc_base = None
                seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)
        ip, output = mkstemp(dir='.', suffix='.txt')
        output_file = path.abspath(output)
        files = [path.abspath(archive), path.abspath(simulation), output_file]

        LOGGER.debug("data preperation took %s seconds.", time.time() - start_time)

        ############################
        # generating the config file
        ############################

        # TODO: add MODEL name as argument

        response.update_status('writing config file', 50)
        start_time = time.time()  # measure write config ...

        try:
            config_file = analogs.get_configfile(
                files=files,
                seasoncyc_base=seasoncyc_base,
                seasoncyc_sim=seasoncyc_sim,
                base_id=model_id,
                sim_id=model_id,
                timewin=timewin,
                varname=variable,
                seacyc=seacyc,
                cycsmooth=91,
                nanalog=nanalog,
                seasonwin=seasonwin,
                distfun=distance,
                outformat=outformat,
                calccor=True,
                silent=False,
                # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
                period=[refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0]],
                bbox="%s,%s,%s,%s" % (bbox[0], bbox[2], bbox[1], bbox[3]))
        except Exception as e:
            msg = 'failed to generate config file %s ' % e
            LOGGER.debug(msg)
            raise Exception(msg)

        LOGGER.debug("write_config took %s seconds.", time.time() - start_time)

        ##############
        # CASTf90 call
        ##############
        import subprocess
        import shlex

        start_time = time.time()  # measure call castf90
        response.update_status('Start CASTf90 call', 60)

        # -----------------------
        try:
            import ctypes
            # TODO: This lib is for linux
            mkl_rt = ctypes.CDLL('libmkl_rt.so')
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('Current number of threads: %s' % (nth))
            mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('NEW number of threads: %s' % (nth))
            # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
            environ['MKL_NUM_THREADS'] = str(nth)
            environ['OMP_NUM_THREADS'] = str(nth)
        except Exception as e:
            msg = 'Failed to set THREADS %s ' % e
            LOGGER.debug(msg)
        # -----------------------

        # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ###############
        # ##### MUST be removed after castf90 recompiled with the latest hdf version
        # ##### NOT safe
        environ['HDF5_DISABLE_VERSION_CHECK'] = '1'
        # hdflib = os.path.expanduser("~") + '/anaconda/lib'
        # hdflib = os.getenv("HOME") + '/anaconda/lib'
        import pwd
        hdflib = pwd.getpwuid(getuid()).pw_dir + '/anaconda/lib'
        environ['LD_LIBRARY_PATH'] = hdflib
        # ################################################################################

        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = 'analogue.out %s' % path.relpath(config_file)
            # system(cmd)
            args = shlex.split(cmd)
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            LOGGER.info('analogue.out info:\n %s ' % output)
            LOGGER.debug('analogue.out errors:\n %s ' % error)
            response.update_status('**** CASTf90 suceeded', 70)
        except Exception as e:
            msg = 'CASTf90 failed %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        # TODO: Add try - except for pdfs
        if plot == 'Yes':
            analogs_pdf = analogs.plot_analogs(configfile=config_file)
        else:
            analogs_pdf = 'dummy_plot.pdf'
            with open(analogs_pdf, 'a'): utime(analogs_pdf, None)

        response.update_status('preparing output', 80)

        response.outputs['analog_pdf'].file = analogs_pdf
        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        if seacyc is True:
            response.outputs['base_netcdf'].file = seasoncyc_base
            response.outputs['sim_netcdf'].file = seasoncyc_sim
        else:
            # TODO: Still unclear how to overpass unknown number of outputs
            dummy_base = 'dummy_base.nc'
            dummy_sim = 'dummy_sim.nc'
            with open(dummy_base, 'a'): utime(dummy_base, None)
            with open(dummy_sim, 'a'): utime(dummy_sim, None)
            response.outputs['base_netcdf'].file = dummy_base
            response.outputs['sim_netcdf'].file = dummy_sim

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 90)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 95)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Exemple #14
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 7)

            refSt = request.inputs['refSt'][0].data
            #refEn = request.inputs['refEn'][0].data
            refEn = dt.strptime(
                '%s%s%s' % (dt.now().year, dt.now().month, dt.now().day),
                '%Y%m%d')
            refEn = refEn - timedelta(days=3)

            dateSt = request.inputs['dateSt'][0].data
            #dateEn = request.inputs['dateEn'][0].data
            dateEn = dt.strptime(
                '%s%s%s' % (dt.now().year, dt.now().month, dt.now().day),
                '%Y%m%d')
            dateEn = dateEn - timedelta(days=3)

            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data

            bboxDef = '-80,50,20,70'  # in general format
            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180)
                    or abs(float(bboxStr[2]) > 90)
                    or abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug(
                    'BBOX is out of the range, using default instead: %s ' %
                    (bboxStr))
                bboxStr = bboxStr.split(',')

            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            normalize = request.inputs['normalize'][0].data
            detrend = request.inputs['detrend'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            model_var = request.inputs['reanalyses'][0].data
            model, var = model_var.split('_')

            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 8)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            response.update_status('Preparing enviroment converting arguments',
                                   9)
            LOGGER.debug('date: %s %s %s %s ' %
                         (type(refSt), refEn, dateSt, dateSt))

            start = min(refSt, dateSt)
            end = max(refEn, dateEn)

            if normalize == 'None':
                seacyc = False
            else:
                seacyc = True

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.exception('output format not valid')

        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 10)

        # We work only with NCEP
        getlevel = False
        if 'z' in var:
            level = var.strip('z')
        else:
            level = None

        ##########################################
        # fetch Data from original data archive
        ##########################################

        try:
            model_nc = rl(start=start.year,
                          end=end.year,
                          dataset=model,
                          variable=var,
                          getlevel=getlevel)
            LOGGER.info('reanalyses data fetched')
        except Exception:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('subsetting region of interest', 17)

        LOGGER.debug("start and end time: %s - %s" % (start, end))
        time_range = [start, end]

        # Checking memory and dataset size
        m_size = get_files_size(model_nc)
        memory_avail = psutil.virtual_memory().available
        thrs = 0.2  # 20%

        if (m_size >= thrs * memory_avail):
            ser_r = True
        else:
            ser_r = False

        LOGGER.debug('Available Memory: %s ' % (memory_avail))
        LOGGER.debug('Dataset size: %s ' % (m_size))
        LOGGER.debug('Threshold: %s ' % (thrs * memory_avail))
        LOGGER.debug('Serial or at once: %s ' % (ser_r))

        #        if ('20CRV2' in model) and ('z' in var):
        if ('z' in var):
            tmp_total = []
            origvar = get_variable(model_nc)

            for z in model_nc:
                tmp_n = 'tmp_%s' % (uuid.uuid1())
                b0 = call(resource=z,
                          variable=origvar,
                          level_range=[int(level), int(level)],
                          geom=bbox,
                          spatial_wrapping='wrap',
                          prefix='levdom_' + os.path.basename(z)[0:-3])
                tmp_total.append(b0)

            tmp_total = sorted(
                tmp_total,
                key=lambda i: os.path.splitext(os.path.basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total,
                                    variable=origvar,
                                    time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                os.system(tbr)

            # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z%s' % level,
                                        z_var.dtype,
                                        dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            # new_var.setncatts({k: z_var.getncattr(k) for k in z_var.ncattrs()})
            ds.close()
            model_subset_tmp = call(inter_subset_tmp, variable='z%s' % level)
        else:
            if ser_r:
                LOGGER.debug('Process reanalysis step-by-step')
                tmp_total = []
                for z in model_nc:
                    tmp_n = 'tmp_%s' % (uuid.uuid1())
                    b0 = call(resource=z,
                              variable=var,
                              geom=bbox,
                              spatial_wrapping='wrap',
                              prefix='Rdom_' + os.path.basename(z)[0:-3])
                    tmp_total.append(b0)
                tmp_total = sorted(
                    tmp_total,
                    key=lambda i: os.path.splitext(os.path.basename(i))[0])
                model_subset_tmp = call(resource=tmp_total,
                                        variable=var,
                                        time_range=time_range)

                # Clean
                for i in tmp_total:
                    tbr = 'rm -f %s' % (i)
                    os.system(tbr)
            else:
                LOGGER.debug('Using whole dataset at once')
                model_subset_tmp = call(
                    resource=model_nc,
                    variable=var,
                    geom=bbox,
                    spatial_wrapping='wrap',
                    time_range=time_range,
                )
        # Rest from 20CRV...
        model_subset = model_subset_tmp

        LOGGER.info('Dataset subset done: %s ', model_subset)

        response.update_status('dataset subsetted', 19)

        # BLOCK OF DETRENDING of model_subset !
        # Original model subset kept to further visualisaion if needed
        # Now is issue with SLP:
        # TODO 1 Keep trend as separate file
        # TODO 2 Think how to add options to plot abomalies AND original data...
        #        May be do archive and simulation = call.. over NOT detrended data and keep it as well
        # TODO 3 Check with faster smoother add removing trend of each grid

        if detrend == 'None':
            orig_model_subset = model_subset
        else:
            orig_model_subset = remove_mean_trend(model_subset, varname=var)

        # ======================================

        LOGGER.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        response.update_status('**** Input data fetched', 20)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 22)
        start_time = time.time()  # measure data preperation ...

        try:
            # Construct descriptive filenames for the three files
            # listed in config file
            # TODO check strftime for years <1900 (!)

            refDatesString = dt.strftime(
                refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
            simDatesString = dt.strftime(
                dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')
            archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                                % (bbox[0], bbox[2], bbox[1], bbox[3])
            simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])
            archive = call(resource=model_subset,
                           time_range=[refSt, refEn],
                           prefix=archiveNameString)
            simulation = call(resource=model_subset,
                              time_range=[dateSt, dateEn],
                              prefix=simNameString)
            LOGGER.info('archive and simulation files generated: %s, %s' %
                        (archive, simulation))
        except Exception as e:
            msg = 'failed to prepare archive and simulation files %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            if seacyc is True:
                LOGGER.info('normalization function with method: %s ' %
                            normalize)
                seasoncyc_base, seasoncyc_sim = analogs.seacyc(
                    archive, simulation, method=normalize)
            else:
                seasoncyc_base = seasoncyc_sim = None
        except Exception as e:
            msg = 'failed to generate normalization files %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        output_file = 'output.txt'
        files = [
            os.path.abspath(archive),
            os.path.abspath(simulation), output_file
        ]
        LOGGER.debug("Data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generate the config file
        ############################
        config_file = analogs.get_configfile(
            files=files,
            seasoncyc_base=seasoncyc_base,
            seasoncyc_sim=seasoncyc_sim,
            base_id=model,
            sim_id=model,
            timewin=timewin,
            varname=var,
            seacyc=seacyc,
            cycsmooth=91,
            nanalog=nanalog,
            seasonwin=seasonwin,
            distfun=distance,
            outformat=outformat,
            calccor=True,
            silent=False,
            period=[
                dt.strftime(refSt, '%Y-%m-%d'),
                dt.strftime(refEn, '%Y-%m-%d')
            ],
            bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox))
        response.update_status('generated config file', 25)
        #######################
        # CASTf90 call
        #######################
        start_time = time.time()  # measure call castf90

        #-----------------------
        try:
            import ctypes
            # TODO: This lib is for linux
            mkl_rt = ctypes.CDLL('libmkl_rt.so')
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('Current number of threads: %s' % (nth))
            mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('NEW number of threads: %s' % (nth))
            # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
            os.environ['MKL_NUM_THREADS'] = str(nth)
            os.environ['OMP_NUM_THREADS'] = str(nth)
        except Exception as e:
            msg = 'Failed to set THREADS %s ' % e
            LOGGER.debug(msg)
        #-----------------------

        # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ###############
        # ##### MUST be removed after castf90 recompiled with the latest hdf version
        # ##### NOT safe
        os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1'
        #hdflib = os.path.expanduser("~") + '/anaconda/lib'
        #hdflib = os.getenv("HOME") + '/anaconda/lib'
        import pwd
        hdflib = pwd.getpwuid(os.getuid()).pw_dir + '/anaconda/lib'
        os.environ['LD_LIBRARY_PATH'] = hdflib
        # ################################################################################

        response.update_status('Start CASTf90 call', 30)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = ['analogue.out', config_file]
            LOGGER.debug("castf90 command: %s", cmd)
            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            LOGGER.info('analogue output:\n %s', output)
            response.update_status('**** CASTf90 suceeded', 40)
        except CalledProcessError as e:
            msg = 'CASTf90 failed:\n{0}'.format(e.output)
            LOGGER.exception(msg)
            raise Exception(msg)
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        # --------------- R cont analogs calcs -----------------------------------

        #######################
        # call the R scripts
        #######################
        response.update_status(
            'Start calculation of the stats and Return Periods ', 50)
        import shlex
        # import subprocess
        from blackswan import config
        from blackswan.visualisation import pdfmerge
        from os.path import curdir, exists, join

        try:
            #rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'analogs_diags-prox.R'
            Rdatfile = 'analogs_RT.Rdat'
            probs_c = 0.7
            probs_n = 0.3

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s' % output_file,
                '%s' % probs_c,
                '%s' % probs_n,
                '%s' % Rdatfile
            ]

            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))
        except:
            msg = 'failed to build the R command'
            LOGGER.exception(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.exception('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                response.update_status('**** Return Periods with R suceeded',
                                       60)
            else:
                LOGGER.exception('NO! output returned from R call')
            analogs_pdf = pdfmerge(
                ['analogs_score-diags_new.pdf', 'analogs_RP-diags_new.pdf'])
        except:
            msg = 'ReturnPeriods in R'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('Calculation of Return Periods done ', 70)

        # --------------- END of R cont analogs calcs ----------------------------

        response.update_status('preparing output', 75)

        response.outputs['analog_pdf'].file = analogs_pdf

        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file
        response.outputs['output_netcdf'].file = simulation
        response.outputs['target_netcdf'].file = archive

        if seacyc is True:
            response.outputs['base_netcdf'].file = seasoncyc_base
            response.outputs['sim_netcdf'].file = seasoncyc_sim
        else:
            # TODO: Still unclear how to overpass unknown number of outputs
            dummy_base = 'dummy_base.nc'
            dummy_sim = 'dummy_sim.nc'
            with open(dummy_base, 'a'):
                os.utime(dummy_base, None)
            with open(dummy_sim, 'a'):
                os.utime(dummy_sim, None)
            response.outputs['base_netcdf'].file = dummy_base
            response.outputs['sim_netcdf'].file = dummy_sim

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 80)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 90)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response
Exemple #15
0
def localdims(resource, ap=0.5, variable=None, distance='euclidean'):
    """
    calculating of a local dimentions and persistence

    :param resource: str or list of str containing the netCDF file path

    :return 2 arrays: local dimentions and persistence
    """

    if variable is None:
        variable = get_variable(resource)

    data = get_values(resource, variable=variable)

    lat_index = get_index_lat(resource, variable=variable)
    lon_index = get_index_lon(resource, variable=variable)

    # TODO: should be 3D with TIME first.
    # Think how to operate with 4D and unknown stucture (lat,lon,level,time) for expample.

    # dat=data.reshape((data.shape[0],data.shape[1]*data.shape[2]))
    dat = data.reshape(
        (data.shape[0], data.shape[lat_index] * data.shape[lon_index]))

    # Quantile definition
    quanti = 0.98

    # npoints=np.size(dat,0)
    # npoints=np.size(data,0)
    npoints = len(data[:, 0])

    dim = np.empty((npoints))
    dim.fill(np.nan)
    theta = np.empty((npoints))
    theta.fill(np.nan)

    l = 0

    # Calculation of total distance matrix

    dist = cdist(dat, dat, metric=distance)
    print np.shape(dist)

    # 0.5 = Python and Mathlab, 1 = R
    abal = ap
    # abal=0.5

    for l in range(npoints):

        distance = dist[:, l]

        logdista = -np.log(distance)
        x = logdista[~np.isinf(logdista)]
        logdista = x[~np.isnan(x)]
        thresh = mquantiles(logdista, quanti, alphap=abal, betap=abal)
        # NOT ORIGINAL
        logdista = -np.log(distance)
        Li = [
            i for i in range(len(logdista))
            if (logdista[i] > thresh) and (logdista[i] < 0)
        ]
        Ti = np.diff(Li)
        N = len(Ti)
        q = 1. - quanti
        Si = Ti - 1
        Ncc = [i for i in range(len(Si)) if (Si[i] > 0)]
        Nc = len(Ncc)

        theta[l] = (sum(q * Si) + N + Nc - np.sqrt((
            (sum(q * Si) + N + Nc)**2) - 8 * Nc * sum(q * Si))) / (2 *
                                                                   sum(q * Si))
        logdista = np.sort(logdista)

        findidx = [
            i for i in range(len(logdista))
            if (logdista[i] > thresh) and (logdista[i] < 0)
        ]

        logextr = logdista[findidx[0]:len(logdista) - 1]
        # logextr=logdista[findidx[[1]]:(length(logdista)-1)]

        dim[l] = 1 / np.mean(logextr - thresh)

    return dim, theta
Exemple #16
0
def uncertainty(resouces, variable=None, ylim=None, title=None, file_extension='png', window=None):
    """
    creates a png file containing the appropriate uncertainty plot.

    :param resouces: list of files containing the same variable
    :param variable: variable to be visualised. If None (default), variable will be detected
    :param title: string to be used as title
    :param window: windowsize of the rolling mean

    :returns str: path/to/file.png
    """
    LOGGER.debug('Start visualisation uncertainty plot')

    import pandas as pd
    import numpy as np
    from os.path import basename
    from blackswan.utils import get_time, sort_by_filename
    from blackswan.calculation import fieldmean
    from blackswan.metadata import get_frequency

    # === prepare invironment
    if type(resouces) == str:
        resouces = list([resouces])
    if variable is None:
        variable = utils.get_variable(resouces[0])
    if title is None:
        title = "Field mean of %s " % variable

    try:
        fig = plt.figure(figsize=(20, 10), facecolor='w', edgecolor='k')  # dpi=600,
        #  variable = utils.get_variable(resouces[0])
        df = pd.DataFrame()

        LOGGER.info('variable %s found in resources.' % variable)
        datasets = sort_by_filename(resouces, historical_concatination=True)

        for key in datasets.keys():
            try:
                data = fieldmean(datasets[key])  # get_values(f)
                ts = get_time(datasets[key])
                ds = pd.Series(data=data, index=ts, name=key)
                # ds_yr = ds.resample('12M', ).mean()   # yearly mean loffset='6M'
                df[key] = ds

            except Exception:
                LOGGER.exception('failed to calculate timeseries for %s ' % (key))

        frq = get_frequency(resouces[0])
        print frq

        if window is None:
            if frq == 'day':
                window = 10951
            elif frq == 'man':
                window = 359
            elif frq == 'sem':
                window = 119
            elif frq == 'yr':
                window = 30
            else:
                LOGGER.debug('frequency %s is not included' % frq)
                window = 30

        if len(df.index.values) >= window * 2:
            # TODO: calculate windowsize according to timestapms (day,mon,yr ... with get_frequency)
            df_smooth = df.rolling(window=window, center=True).mean()
            LOGGER.info('rolling mean calculated for all input data')
        else:
            df_smooth = df
            LOGGER.debug('timeseries too short for moving mean')
            fig.text(0.95, 0.05, '!!! timeseries too short for moving mean over 30years !!!',
                     fontsize=20, color='red',
                     ha='right', va='bottom', alpha=0.5)

        try:
            rmean = df_smooth.quantile([0.5], axis=1,)  # df_smooth.median(axis=1)
            # skipna=False  quantile([0.5], axis=1, numeric_only=False )
            q05 = df_smooth.quantile([0.10], axis=1,)  # numeric_only=False)
            q33 = df_smooth.quantile([0.33], axis=1,)  # numeric_only=False)
            q66 = df_smooth.quantile([0.66], axis=1, )  # numeric_only=False)
            q95 = df_smooth.quantile([0.90], axis=1, )  # numeric_only=False)
            LOGGER.info('quantile calculated for all input data')
        except Exception:
            LOGGER.exception('failed to calculate quantiles')

        try:
            plt.fill_between(df_smooth.index.values, np.squeeze(q05.values), np.squeeze(q95.values),
                             alpha=0.5, color='grey')
            plt.fill_between(df_smooth.index.values, np.squeeze(q33.values), np.squeeze(q66.values),
                             alpha=0.5, color='grey')

            plt.plot(df_smooth.index.values, np.squeeze(rmean.values), c='r', lw=3)

            plt.xlim(min(df.index.values), max(df.index.values))
            plt.ylim(ylim)
            plt.title(title, fontsize=20)
            plt.grid()  # .grid_line_alpha=0.3

            output_png = fig2plot(fig=fig, file_extension=file_extension)
            plt.close()
            LOGGER.debug('timeseries uncertainty plot done for %s' % variable)
        except Exception as err:
            raise Exception('failed to calculate quantiles. %s' % err.message)
    except Exception:
        LOGGER.exception('uncertainty plot failed for %s.' % variable)
        _, output_png = mkstemp(dir='.', suffix='.png')
    return output_png
Exemple #17
0
def localdims_par(resource, ap=0.5, variable=None, distance='euclidean'):
    """
    calculating of a local dimentions and persistence

    :param resource: str or list of str containing the netCDF file path

    :return 2 arrays: local dimentions and persistence
    """

    # ===================================================
    # only for linux
    try:
        mkl_rt = ctypes.CDLL('libmkl_rt.so')
        nth = mkl_rt.mkl_get_max_threads()
        mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
        nth = mkl_rt.mkl_get_max_threads()
        environ['MKL_NUM_THREADS'] = str(nth)
        environ['OMP_NUM_THREADS'] = str(nth)
    except:
        pass
    # ================================================================

    if variable is None:
        variable = get_variable(resource)

    data = get_values(resource, variable=variable)

    lat_index = get_index_lat(resource, variable=variable)
    lon_index = get_index_lon(resource, variable=variable)

    # TODO: should be 3D with TIME first.
    # Think how to operate with 4D and unknown stucture (lat,lon,level,time) for expample.

    # dat=data.reshape((data.shape[0],data.shape[1]*data.shape[2]))
    global glob_dat
    glob_dat = data.reshape(
        (data.shape[0], data.shape[lat_index] * data.shape[lon_index]))

    # Quantile definition
    global glob_quanti
    glob_quanti = 0.98

    # npoints=np.size(dat,0)
    # npoints=np.size(data,0)
    npoints = len(data[:, 0])

    dim = np.empty((npoints))
    dim.fill(np.nan)
    theta = np.empty((npoints))
    theta.fill(np.nan)

    global glob_abal
    glob_abal = ap
    global glob_distance
    glob_distance = distance

    # global dat as list to be used by pool.map
    l_dat = [glob_dat[i, :].reshape(1, -1) for i in range(npoints)]

    # multi CPU
    pool = Pool()
    res_p = pool.map(_calc_dist, l_dat)
    pool.close()
    pool.join()

    # collect results
    for i, j in enumerate(res_p):
        dim[i] = res_p[i][0]
        theta[i] = res_p[i][1]

    return dim, theta
Exemple #18
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')

        response.update_status('execution started at : %s ' % dt.now(), 10)

        ################################
        # reading in the input arguments
        ################################
        try:
            response.update_status(
                'execution started at : {}'.format(dt.now()), 20)

            ################################
            # reading in the input arguments
            ################################
            LOGGER.info('read in the arguments')
            resource = archiveextract(
                resource=[res.file for res in request.inputs['resource']])

            # If files are from different datasets.
            # i.e. files: ...output1/slp.1999.nc and ...output2/slp.1997.nc will not be sorted with just .sort()
            # So:
            if type(resource) == list:
                resource = sorted(
                    resource, key=lambda i: path.splitext(path.basename(i))[0])
            else:
                resource = [resource]

            season = request.inputs['season'][0].data
            LOGGER.info('season %s', season)

            bboxDef = '-80,50,20,70'  # in general format

            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or abs(float(bboxStr[1]) > 180)
                    or abs(float(bboxStr[2]) > 90)
                    or abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug(
                    'BBOX is out of the range, using default instead: %s ' %
                    (bboxStr))
                bboxStr = bboxStr.split(',')

            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            period = request.inputs['period'][0].data
            LOGGER.info('period %s', period)
            anualcycle = request.inputs['anualcycle'][0].data
            kappa = request.inputs['kappa'][0].data
            LOGGER.info('kappa %s', kappa)

            method = request.inputs['method'][0].data
            LOGGER.info('Calc annual cycle with %s', method)

            sseas = request.inputs['sseas'][0].data
            LOGGER.info('Annual cycle calc with %s', sseas)

            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')

            # OCGIS for models workaround - to catch 31 of Dec
            start = dt.combine(start, dt_time(12, 0))
            end = dt.combine(end, dt_time(12, 0))

            cycst = anualcycle.split('-')[0]
            cycen = anualcycle.split('-')[1]
            reference = [
                dt.strptime(cycst, '%Y%m%d'),
                dt.strptime(cycen, '%Y%m%d')
            ]
            LOGGER.debug('Reference start: %s , end: %s ' %
                         (reference[0], reference[1]))

            reference[0] = dt.combine(reference[0], dt_time(12, 0))
            reference[1] = dt.combine(reference[1], dt_time(12, 0))
            LOGGER.debug('New Reference start: %s , end: %s ' %
                         (reference[0], reference[1]))

            # Check if 360_day calendar (all months are exactly 30 days):
            try:
                modcal, calunits = get_calendar(resource[0])
                if '360_day' in modcal:
                    if start.day == 31:
                        start = start.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (start))
                    if end.day == 31:
                        end = end.replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' % (end))
                    if reference[0].day == 31:
                        reference[0] = reference[0].replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' %
                                     (reference[0]))
                    if reference[1].day == 31:
                        reference[1] = reference[1].replace(day=30)
                        LOGGER.debug('Date has been changed for: %s' %
                                     (reference[1]))
            except:
                LOGGER.debug('Could not detect calendar')

            LOGGER.debug('start: %s , end: %s ', start, end)
            LOGGER.info('bbox %s', bbox)
            LOGGER.info('period %s', period)
            LOGGER.info('season %s', season)
        except Exception as e:
            msg = 'failed to read in the arguments'
            LOGGER.exception(msg)
            raise Exception(msg)

        ############################################################
        # get the required bbox and time region from resource data
        ############################################################
        response.update_status('start subsetting', 30)

        from blackswan.ocgis_module import call
        from blackswan.utils import get_variable, get_timerange
        time_range = [start, end]

        tmp_resource = []
        for re in resource:
            s, e = get_timerange(re)
            tmpSt = dt.strptime(s, '%Y%m%d')
            tmpEn = dt.strptime(e, '%Y%m%d')
            if ((tmpSt <= end) and (tmpEn >= start)):
                tmp_resource.append(re)
                LOGGER.debug('Selected file: %s ' % (re))
        resource = tmp_resource

        # Here start trick with z... levels and regriding...
        # Otherwise call will give memory error for hires models like MIROC4h
        # TODO: Add level and domain selection as in wps_analogs_model for 4D var.

        variable = get_variable(resource)

        from blackswan.datafetch import reanalyses
        import uuid
        ref_var = 'slp'
        refR = 'NCEP'
        ref_rea = reanalyses(start=2014,
                             end=2014,
                             variable=ref_var,
                             dataset=refR)

        regr_res = []
        for z in resource:
            tmp_n = 'tmp_%s' % (uuid.uuid1())

            # XXXXXXX
            s, e = get_timerange(z)
            tmpSt = dt.strptime(s, '%Y%m%d')
            tmpEn = dt.strptime(e, '%Y%m%d')
            tmpSt = dt.combine(tmpSt, dt_time(0, 0))
            tmpEn = dt.combine(tmpEn, dt_time(23, 0))

            if ((tmpSt <= start) and (tmpEn >= end)):
                LOGGER.debug('Resource contains full record, selecting : %s ' %
                             (time_range))
                full_res = call(z, variable=variable, time_range=time_range)
            else:
                full_res = z

            LOGGER.debug(
                'The subset from the big model file, or initial file: %s ' %
                (full_res))

            # XXXXXXX

            # TODO: regrid needs here (???)
            # Check how to manage one big file with geopotential
            # TODO:
            # Adapt to work with levels for geopotential (check how its done for reanalysis process)

            # b0=call(resource=z, variable=variable,
            b0 = call(resource=full_res,
                      variable=variable,
                      spatial_wrapping='wrap',
                      cdover='system',
                      regrid_destination=ref_rea[0],
                      regrid_options='bil',
                      prefix=tmp_n)

            # TODO: Use cdo regrid outside call - before call...
            # Some issues with produced ocgis file inside ocgis_module cdo regrid:
            # cdo remapbil (Abort): Unsupported projection coordinates (Variable: psl)!

            # select domain
            b01 = call(resource=b0,
                       geom=bbox,
                       spatial_wrapping='wrap',
                       prefix='levregr_' + path.basename(z)[0:-3])
            tbr = 'rm -f %s' % (b0)
            system(tbr)
            tbr = 'rm -f %s.nc' % (tmp_n)
            system(tbr)
            # get full resource
            regr_res.append(b01)

        model_subset = call(regr_res, time_range=time_range)

        # TODO: CHANGE to cross-platform
        for i in regr_res:
            tbr = 'rm -f %s' % (i)
            system(tbr)

        # Get domain
        # from cdo import Cdo
        # from os import environ
        # cdo = Cdo(env=environ)

        # regr_res = []
        # for res_fn in resource:
        #    tmp_f = 'dom_' + path.basename(res_fn)
        #    comcdo = '%s,%s,%s,%s' % (bbox[0],bbox[2],bbox[1],bbox[3])
        #    cdo.sellonlatbox(comcdo, input=res_fn, output=tmp_f)
        #    # tmp_f = call(resource=res_fn, geom=bbox, spatial_wrapping='wrap', prefix=tmp_f)
        #    regr_res.append(tmp_f)
        #    LOGGER.debug('File with selected domain: %s ' % (tmp_f))

        # model_subset = call(
        #    # resource=resource, variable=variable,
        #    resource=regr_res, variable=variable,
        #    geom=bbox, spatial_wrapping='wrap', time_range=time_range,  # conform_units_to=conform_units_to
        # )

        LOGGER.info('Dataset subset done: %s ' % model_subset)
        response.update_status('dataset subsetted', 40)

        #####################
        # computing anomalies
        #####################

        response.update_status('computing anomalies ', 50)

        model_anomal = wr.get_anomalies(model_subset,
                                        reference=reference,
                                        method=method,
                                        sseas=sseas)

        ###################
        # extracting season
        ####################
        model_season = wr.get_season(model_anomal, season=season)
        response.update_status('values normalized', 60)

        ####################
        # call the R scripts
        ####################
        response.update_status('Start weather regime clustering ', 70)
        import shlex
        import subprocess
        from blackswan import config
        from os.path import curdir, exists, join

        try:
            # rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_model.R'

            infile = model_season  # model_subset #model_ponderate
            # modelname = 'MODEL'
            # yr1 = start.year
            # yr2 = end.year
            ip, output_graphics = mkstemp(dir=curdir, suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % infile,
                '%s' % variable,
                '%s' % output_graphics,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % 'MODEL',
                '%s' % kappa
            ]
            LOGGER.info('Rcall builded')
            LOGGER.debug('ARGS: %s' % (args))
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            LOGGER.error(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            # ,shell=True
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                response.update_status('**** weatherregime in R suceeded', 80)
            else:
                LOGGER.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        response.update_status('Weather regime clustering done ', 90)
        ############################################
        # set the outputs
        ############################################
        # response.update_status('Set the process outputs ', 95)
        response.outputs['Routput_graphic'].file = output_graphics
        response.outputs['output_pca'].file = file_pca
        response.outputs['output_classification'].file = file_class
        response.outputs['output_netcdf'].file = model_season
        response.update_status('done', 100)
        return response
Exemple #19
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        LOGGER.info('Start process')
        response.update_status('execution started at : {}'.format(dt.now()), 5)

        process_start_time = time.time()  # measure process execution time ...
        start_time = time.time()  # measure init ...

        ################################
        # reading in the input arguments
        ################################

        try:
            response.update_status('read input parameter : %s ' % dt.now(), 6)

            refSt = request.inputs['refSt'][0].data
            refEn = request.inputs['refEn'][0].data
            dateSt = request.inputs['dateSt'][0].data
            dateEn = request.inputs['dateEn'][0].data

            seasonwin = request.inputs['seasonwin'][0].data
            nanalog = request.inputs['nanalog'][0].data

            bboxDef = '-20,40,30,70'  # in general format

            bbox = []
            bboxStr = request.inputs['BBox'][0].data
            LOGGER.debug('BBOX selected by user: %s ' % (bboxStr))
            bboxStr = bboxStr.split(',')

            # Checking for wrong cordinates and apply default if nesessary
            if (abs(float(bboxStr[0])) > 180 or
                    abs(float(bboxStr[1]) > 180) or
                    abs(float(bboxStr[2]) > 90) or
                    abs(float(bboxStr[3])) > 90):
                bboxStr = bboxDef  # request.inputs['BBox'].default  # .default doesn't work anymore!!!
                LOGGER.debug('BBOX is out of the range, using default instead: %s ' % (bboxStr))
                bboxStr = bboxStr.split(',')

            bbox.append(float(bboxStr[0]))
            bbox.append(float(bboxStr[2]))
            bbox.append(float(bboxStr[1]))
            bbox.append(float(bboxStr[3]))
            LOGGER.debug('BBOX for ocgis: %s ' % (bbox))
            LOGGER.debug('BBOX original: %s ' % (bboxStr))

            plot = request.inputs['plot'][0].data
            distance = request.inputs['dist'][0].data
            outformat = request.inputs['outformat'][0].data
            timewin = request.inputs['timewin'][0].data

            model_var = request.inputs['reanalyses'][0].data
            model, var = model_var.split('_')

            ref_model_var = request.inputs['Refreanalyses'][0].data
            ref_model, ref_var = ref_model_var.split('_')

            LOGGER.info('input parameters set')
            response.update_status('Read in and convert the arguments', 7)
        except Exception as e:
            msg = 'failed to read input prameter %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ######################################
        # convert types and set environment
        ######################################
        try:
            response.update_status('Preparing enviroment converting arguments', 8)
            LOGGER.debug('date: %s %s %s %s ' % (type(refSt), refEn, dateSt, dateSt))

            # normalize == 'None':
            seacyc = False

            if outformat == 'ascii':
                outformat = '.txt'
            elif outformat == 'netCDF':
                outformat = '.nc'
            else:
                LOGGER.exception('output format not valid')

        except Exception as e:
            msg = 'failed to set environment %s ' % e
            LOGGER.exception(msg)
            raise Exception(msg)

        ###########################
        # set the environment
        ###########################

        response.update_status('fetching data from archive', 9)

        getlevel = False
        if 'z' in var:
            level = var.strip('z')
        else:
            level = None

        ##########################################
        # fetch Data from original data archive
        ##########################################

        try:
            model_nc = rl(start=dateSt.year, end=dateEn.year,
                          dataset=model, variable=var,
                          getlevel=getlevel)

            ref_model_nc = rl(start=refSt.year, end=refEn.year,
                              dataset=ref_model, variable=ref_var,
                              getlevel=getlevel)

            LOGGER.info('reanalyses data fetched')
        except Exception:
            msg = 'failed to get reanalyses data'
            LOGGER.exception(msg)
            raise Exception(msg)

        response.update_status('subsetting region of interest', 10)

        # Checking memory and dataset size
        model_size = get_files_size(model_nc)
        ref_model_size = get_files_size(ref_model_nc)

        m_size = max(model_size, ref_model_size)

        memory_avail = psutil.virtual_memory().available
        thrs = 0.2  # 20%

        if (m_size >= thrs * memory_avail):
            ser_r = True
        else:
            ser_r = False

        LOGGER.debug('Available Memory: %s ' % (memory_avail))
        LOGGER.debug('Dataset size: %s ' % (m_size))
        LOGGER.debug('Threshold: %s ' % (thrs * memory_avail))
        LOGGER.debug('Serial or at once: %s ' % (ser_r))

        # #####################################################
        # Construct descriptive filenames for the three files #
        # listed in config file                               #
        # TODO check strftime for years <1900 (!)             #
        # #####################################################

        # refDatesString = dt.strftime(refSt, '%Y-%m-%d') + "_" + dt.strftime(refEn, '%Y-%m-%d')
        # simDatesString = dt.strftime(dateSt, '%Y-%m-%d') + "_" + dt.strftime(dateEn, '%Y-%m-%d')

        # Fix < 1900 issue...
        refDatesString = refSt.isoformat().strip().split("T")[0] + "_" + refEn.isoformat().strip().split("T")[0]
        simDatesString = dateSt.isoformat().strip().split("T")[0] + "_" + dateEn.isoformat().strip().split("T")[0]

        archiveNameString = "base_" + var + "_" + refDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])
        simNameString = "sim_" + var + "_" + simDatesString + '_%.1f_%.1f_%.1f_%.1f' \
                            % (bbox[0], bbox[2], bbox[1], bbox[3])

        if ('z' in var):
            # ------------------ NCEP -------------------
            tmp_total = []
            origvar = get_variable(model_nc)

            for z in model_nc:
                b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)], geom=bbox,
                spatial_wrapping='wrap', prefix='levdom_' + os.path.basename(z)[0:-3])
                tmp_total.append(b0)

            time_range = [dateSt, dateEn]

            tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
            inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                os.system(tbr)

            # Create new variable
            ds = Dataset(inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            ds.close()
            simulation = call(inter_subset_tmp, variable='z%s' % level, prefix=simNameString)

            # ------------------ 20CRV2c -------------------
            tmp_total = []
            origvar = get_variable(ref_model_nc)

            for z in ref_model_nc:

                tmp_n = 'tmp_%s' % (uuid.uuid1())
                # select level and regrid
                b0 = call(resource=z, variable=origvar, level_range=[int(level), int(level)],
                        spatial_wrapping='wrap', cdover='system',
                        regrid_destination=model_nc[0], regrid_options='bil', prefix=tmp_n)

                # select domain
                b01 = call(resource=b0, variable=origvar, geom=bbox, spatial_wrapping='wrap', prefix='levregr_' + os.path.basename(z)[0:-3])
                tbr = 'rm -f %s' % (b0)
                os.system(tbr)
                tbr = 'rm -f %s.nc' % (tmp_n)
                os.system(tbr)

                tmp_total.append(b01)

            time_range = [refSt, refEn]

            tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
            ref_inter_subset_tmp = call(resource=tmp_total, variable=origvar, time_range=time_range)

            # Clean
            for i in tmp_total:
                tbr = 'rm -f %s' % (i)
                os.system(tbr)

            # Create new variable
            ds = Dataset(ref_inter_subset_tmp, mode='a')
            z_var = ds.variables.pop(origvar)
            dims = z_var.dimensions
            new_var = ds.createVariable('z%s' % level, z_var.dtype, dimensions=(dims[0], dims[2], dims[3]))
            new_var[:, :, :] = squeeze(z_var[:, 0, :, :])
            ds.close()
            archive = call(ref_inter_subset_tmp, variable='z%s' % level, prefix=archiveNameString)

        else:
            if ser_r:
                LOGGER.debug('Process reanalysis step-by-step')
                # ----- NCEP ------
                tmp_total = []
                for z in model_nc:
                    b0 = call(resource=z, variable=var, geom=bbox, spatial_wrapping='wrap',
                            prefix='Rdom_' + os.path.basename(z)[0:-3])
                    tmp_total.append(b0)

                tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
                simulation = call(resource=tmp_total, variable=var, time_range=[dateSt, dateEn], prefix=simNameString)

                # Clean
                for i in tmp_total:
                    tbr = 'rm -f %s' % (i)
                    os.system(tbr)

                # ----- 20CRV2c ------
                tmp_n = 'tmp_%s' % (uuid.uuid1())
                tmp_total = []
                for z in ref_model_nc:
                    # regrid
                    b0 = call(resource=z, variable=ref_var, spatial_wrapping='wrap', cdover='system',
                            regrid_destination=model_nc[0], regrid_options='bil', prefix=tmp_n)
                    # select domain
                    b01 = call(resource=b0, variable=ref_var, geom=bbox, spatial_wrapping='wrap',
                             prefix='ref_Rdom_' + os.path.basename(z)[0:-3])

                    tbr = 'rm -f %s' % (b0)
                    os.system(tbr)
                    tbr = 'rm -f %s.nc' % (tmp_n)
                    os.system(tbr)

                    tmp_total.append(b01)

                tmp_total = sorted(tmp_total, key=lambda i: os.path.splitext(os.path.basename(i))[0])
                archive = call(resource=tmp_total, variable=ref_var, time_range=[refSt, refEn], prefix=archiveNameString)
                # Clean
                for i in tmp_total:
                    tbr = 'rm -f %s' % (i)
                    os.system(tbr)
            else:
                LOGGER.debug('Using whole dataset at once')

                simulation = call(resource=model_nc, variable=var,
                                        geom=bbox, spatial_wrapping='wrap', time_range=[dateSt, dateEn], prefix=simNameString)

                ref_inter_subset_tmp = call(resource=ref_model_nc, variable=ref_var, spatial_wrapping='wrap',
                                            cdover='system', regrid_destination=model_nc[0], regrid_options='bil')

                archive = call(resource=ref_inter_subset_tmp, geom=bbox, spatial_wrapping='wrap', time_range=[refSt, refEn], prefix=archiveNameString)

        response.update_status('datasets subsetted', 15)

        LOGGER.debug("get_input_subset_dataset took %s seconds.",
                     time.time() - start_time)
        response.update_status('**** Input data fetched', 20)

        ########################
        # input data preperation
        ########################
        response.update_status('Start preparing input data', 30)
        start_time = time.time()  # measure data preperation ...

        LOGGER.info('archive and simulation files generated: %s, %s'
                    % (archive, simulation))

        # Rename variable (TODO: For this specific process we know names: slp and prmsl...)
        try:
            if level is not None:
                out_var = 'z%s' % level
            else:
                var_archive = get_variable(archive)
                var_simulation = get_variable(simulation)
                if var_archive != var_simulation:
                    rename_variable(archive, oldname=var_archive, newname=var_simulation)
                    out_var = var_simulation
                    LOGGER.info('varname %s in netCDF renamed to %s' % (var_archive, var_simulation))
        except:
            msg = 'failed to rename variable in target files'
            LOGGER.exception(msg)
            raise Exception(msg)

        # seacyc is False:
        seasoncyc_base = seasoncyc_sim = None

        output_file = 'output.txt'
        files = [os.path.abspath(archive), os.path.abspath(simulation), output_file]
        LOGGER.debug("Data preperation took %s seconds.",
                     time.time() - start_time)

        ############################
        # generate the config file
        ############################
        config_file = analogs.get_configfile(
            files=files,
            seasoncyc_base=seasoncyc_base,
            seasoncyc_sim=seasoncyc_sim,
            base_id=ref_model,
            sim_id=model,
            timewin=timewin,
            varname=out_var,
            seacyc=seacyc,
            cycsmooth=91,
            nanalog=nanalog,
            seasonwin=seasonwin,
            distfun=distance,
            outformat=outformat,
            calccor=True,
            silent=False,
            # period=[dt.strftime(refSt, '%Y-%m-%d'), dt.strftime(refEn, '%Y-%m-%d')],
            period=[refSt.isoformat().strip().split("T")[0], refEn.isoformat().strip().split("T")[0]],
            bbox="{0[0]},{0[2]},{0[1]},{0[3]}".format(bbox))
        response.update_status('generated config file', 40)
        #######################
        # CASTf90 call
        #######################
        start_time = time.time()  # measure call castf90

        # -----------------------
        try:
            import ctypes
            # TODO: This lib is for linux
            mkl_rt = ctypes.CDLL('libmkl_rt.so')
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('Current number of threads: %s' % (nth))
            mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(64)))
            nth = mkl_rt.mkl_get_max_threads()
            LOGGER.debug('NEW number of threads: %s' % (nth))
            # TODO: Does it \/\/\/ work with default shell=False in subprocess... (?)
            os.environ['MKL_NUM_THREADS'] = str(nth)
            os.environ['OMP_NUM_THREADS'] = str(nth)
        except Exception as e:
            msg = 'Failed to set THREADS %s ' % e
            LOGGER.debug(msg)
        # -----------------------

        # ##### TEMPORAL WORKAROUND! With instaled hdf5-1.8.18 in anaconda ###############
        # ##### MUST be removed after castf90 recompiled with the latest hdf version
        # ##### NOT safe
        os.environ['HDF5_DISABLE_VERSION_CHECK'] = '1'
        # hdflib = os.path.expanduser("~") + '/anaconda/lib'
        # hdflib = os.getenv("HOME") + '/anaconda/lib'
        import pwd
        hdflib = pwd.getpwuid(os.getuid()).pw_dir + '/anaconda/lib'
        os.environ['LD_LIBRARY_PATH'] = hdflib
        # ################################################################################

        response.update_status('Start CASTf90 call', 50)
        try:
            # response.update_status('execution of CASTf90', 50)
            cmd = ['analogue.out', config_file]
            LOGGER.debug("castf90 command: %s", cmd)
            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
            LOGGER.info('analogue output:\n %s', output)
            response.update_status('**** CASTf90 suceeded', 60)
        except CalledProcessError as e:
            msg = 'CASTf90 failed:\n{0}'.format(e.output)
            LOGGER.exception(msg)
            raise Exception(msg)
        LOGGER.debug("castf90 took %s seconds.", time.time() - start_time)

        # TODO: Add try - except for pdfs
        if plot == 'Yes':
            analogs_pdf = analogs.plot_analogs(configfile=config_file)
        else:
            analogs_pdf = 'dummy_plot.pdf'
            with open(analogs_pdf, 'a'): os.utime(analogs_pdf, None)

        response.update_status('preparing output', 70)

        response.outputs['analog_pdf'].file = analogs_pdf
        response.outputs['config'].file = config_file
        response.outputs['analogs'].file = output_file

        ########################
        # generate analog viewer
        ########################

        formated_analogs_file = analogs.reformat_analogs(output_file)
        # response.outputs['formated_analogs'].storage = FileStorage()
        response.outputs['formated_analogs'].file = formated_analogs_file
        LOGGER.info('analogs reformated')
        response.update_status('reformatted analog file', 80)

        viewer_html = analogs.render_viewer(
            # configfile=response.outputs['config'].get_url(),
            configfile=config_file,
            # datafile=response.outputs['formated_analogs'].get_url())
            datafile=formated_analogs_file)
        response.outputs['output'].file = viewer_html
        response.update_status('Successfully generated analogs viewer', 90)
        LOGGER.info('rendered pages: %s ', viewer_html)

        response.update_status('execution ended', 100)
        LOGGER.debug("total execution took %s seconds.",
                     time.time() - process_start_time)
        return response