Ejemplo n.º 1
0
def test_download_with_cache():
    filename = utils.download(TESTDATA['cmip5_tasmax_2006_nc'], cache=paths.cache)
    assert os.path.basename(filename) == 'tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-200612.nc'
Ejemplo n.º 2
0
def reanalyses( start=1948, end=None, variable='slp', dataset='NCEP'):
  """
  Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
  :param start: int for start year to fetch source data
  :param end: int for end year to fetch source data (if None, current year will be the end)
  :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
  :param dataset: default='NCEP' 
  :return list: list of path/files.nc 
  """
  try:
    from datetime import datetime as dt
    
    if end == None: 
      end = dt.now().year
    obs_data = []
    
    if start == None: 
      if dataset == 'NCEP':
        start = 1948
      if dataset == '20CR':
        start = 1851
    logger.info('start / end date set')    
  except Exception as e:
    msg = "get_OBS module failed to get start end dates %s " % e
    logger.debug(msg)
    raise Exception(msg)

  if 'z' in variable:
    level = variable.strip('z')
  else:
    level = None

  logger.info('level: %s' % level)

  try:
    for year in range(start, end + 1):
      try:
        if dataset == 'NCEP':
          if variable == 'slp':
            url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (variable, year)
          elif 'z' in variable:
            url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % ( year)
        elif dataset == '20CRV2':
          if variable == 'prmsl':
            url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year
          if 'z' in variable:
            url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % ( year )
        elif dataset == '20CRV2c':
          if variable == 'prmsl':
            url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year
          if 'z' in variable:
            url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % ( year )     
        else: 
          logger.error('Dataset %s not known' % dataset)
      except Exception as e:
        msg = "could not set url: %s " % e
        logger.debug(msg)
        raise Exception(msg)
      
      try:  
        obs_data.append(utils.download(url, cache=True))
      except:
        msg = "wget failed on {0}.".format(url)
        logger.debug(msg)
        raise Exception(msg)

    logger.info('Reanalyses data fetched for %s files' % len(obs_data))
  except Exception as e:
    msg = "get reanalyses module failed to fetch data %s " % e
    logger.debug(msg)
    raise Exception(msg)

  if level == None:
    data = obs_data
  else:
    logger.info('get level: %s' % level)
    data = get_level(obs_data, level=level)
  return data
Ejemplo n.º 3
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'
        response.update_status('Start process', 0)

        try:
            LOGGER.info('reading the arguments')
            resources = archiveextract(
                resource=rename_complexinputs(request.inputs['resource']))
            period = request.inputs['period']
            period = period[0].data
            indices = [inpt.data for inpt in request.inputs['indices']]
            archive_format = request.inputs['archive_format'][0].data
            LOGGER.info(
                "all arguments read in nr of files in resources: {}".foirmat(
                    len(resources)))
        except Exception as ex:
            msg = 'failed to read in the arguments: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            gbif_url = request.inputs['gbif'][0].data
            csv_file = download(gbif_url)
            LOGGER.info('CSV file fetched sucessfully: %s' % csv_file)
        except Exception as ex:
            msg = 'failed to fetch GBIF file: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            response.update_status('read in latlon coordinates', 10)
            latlon = sdm.latlon_gbifcsv(csv_file)
            LOGGER.info('got occurence coordinates %s ' % csv_file)
        except Exception as ex:
            msg = 'failed to extract the latlon points from file {}: {}'.format(
                csv_file, str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            response.update_status('plot map', 20)
            occurence_map = map_gbifoccurrences(latlon)
            LOGGER.info('GBIF occourence ploted')
        except Exception as ex:
            msg = 'failed to plot occurence map: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)

        #################################
        # calculate the climate indices
        #################################

        # get the indices
        try:
            response.update_status('start calculation of indices', 30)
            ncs_indices = sdm.get_indices(resource=resources, indices=indices)
            LOGGER.info('indice calculation done')
        except Exception as ex:
            msg = 'failed to calculate indices: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            # sort indices
            indices_dic = sdm.sort_indices(ncs_indices)
            LOGGER.info('indice files sorted in dictionary')
        except Exception as ex:
            msg = 'failed to sort indices: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)
            indices_dic = {'dummy': []}

        ncs_references = []
        species_files = []
        stat_infos = []
        PAmask_pngs = []

        response.update_status('Start processing for {} datasets'.format(
            len(indices_dic.keys())))
        for count, key in enumerate(indices_dic.keys()):
            try:
                status_nr = 40 + count * 10
                response.update_status('Start processing of {}'.format(key),
                                       status_nr)

                ncs = indices_dic[key]
                LOGGER.info('with {} files'.format(len(ncs)))

                try:
                    response.update_status('generating the PA mask', 20)
                    PAmask = sdm.get_PAmask(coordinates=latlon, nc=ncs[0])
                    LOGGER.info('PA mask sucessfully generated')
                except Exception as ex:
                    msg = 'failed to generate the PA mask: {}'.format(str(ex))
                    LOGGER.exception(msg)
                    raise Exception(msg)

                try:
                    response.update_status('Ploting PA mask', 25)
                    PAmask_pngs.extend([map_PAmask(PAmask)])
                except Exception as ex:
                    msg = 'failed to plot the PA mask: {}'.format(str(ex))
                    LOGGER.exception(msg)
                    raise Exception(msg)

                try:
                    ncs_reference = sdm.get_reference(ncs_indices=ncs,
                                                      period=period)
                    ncs_references.extend(ncs_reference)
                    LOGGER.info('reference indice calculated {}'.format(
                        ncs_references))
                except Exception as ex:
                    msg = 'failed to calculate the reference: {}'.format(
                        str(ex))
                    LOGGER.exception(msg)
                    raise Exception(msg)

                try:
                    gam_model, predict_gam, gam_info = sdm.get_gam(
                        ncs_reference, PAmask)
                    stat_infos.append(gam_info)
                    response.update_status('GAM sucessfully trained',
                                           status_nr + 5)
                except Exception as ex:
                    msg = 'failed to train GAM for {}: {}'.format(key, str(ex))
                    LOGGER.debug(msg)
                    raise Exception(msg)

                try:
                    prediction = sdm.get_prediction(gam_model, ncs)
                    response.update_status('prediction done', status_nr + 7)
                except Exception as ex:
                    msg = 'failed to predict tree occurence: {}'.format(
                        str(ex))
                    LOGGER.exception(msg)
                    raise Exception(msg)
                #
                # try:
                #     response.update_status('land sea mask for predicted data',  status_nr + 8)
                #     from numpy import invert, isnan, nan, broadcast_arrays  # , array, zeros, linspace, meshgrid
                #     mask = invert(isnan(PAmask))
                #     mask = broadcast_arrays(prediction, mask)[1]
                #     prediction[mask is False] = nan
                # except:
                #     LOGGER.debug('failed to mask predicted data')

                try:
                    species_files.append(sdm.write_to_file(ncs[0], prediction))
                    LOGGER.info('Favourability written to file')
                except Exception as ex:
                    msg = 'failed to write species file: {}'.format(str(ex))
                    LOGGER.debug(msg)
                    raise Exception(msg)

            except Exception as ex:
                msg = 'failed to process SDM chain for {} : {}'.format(
                    key, str(ex))
                LOGGER.exception(msg)
                raise Exception(msg)

        try:
            archive_indices = archive(ncs_indices, format=archive_format)
            LOGGER.info('indices added to archive')
        except Exception as ex:
            msg = 'failed adding indices to archive: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)
            archive_indices = tempfile.mkstemp(suffix='.tar',
                                               prefix='foobar-',
                                               dir='.')

        try:
            archive_references = archive(ncs_references, format=archive_format)
            LOGGER.info('indices reference added to archive')
        except Exception as ex:
            msg = 'failed adding reference indices to archive: {}'.format(
                str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)
            archive_references = tempfile.mkstemp(suffix='.tar',
                                                  prefix='foobar-',
                                                  dir='.')

        try:
            archive_prediction = archive(species_files, format=archive_format)
            LOGGER.info('species_files added to archive')
        except Exception as ex:
            msg = 'failed adding species_files indices to archive: {}'.format(
                str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)

        try:
            stat_infosconcat = pdfmerge(stat_infos)
            LOGGER.debug('pngs {}'.format(PAmask_pngs))
            PAmask_png = concat_images(PAmask_pngs, orientation='h')
            LOGGER.info('stat infos pdfs and mask pngs merged')
        except Exception as ex:
            msg = 'failed to concat images: {}'.format(str(ex))
            LOGGER.exception(msg)
            raise Exception(msg)
            _, stat_infosconcat = tempfile.mkstemp(suffix='.pdf',
                                                   prefix='foobar-',
                                                   dir='.')
            _, PAmask_png = tempfile.mkstemp(suffix='.png',
                                             prefix='foobar-',
                                             dir='.')

        # self.output_csv.setValue(csv_file)
        response.outputs['output_gbif'].file = occurence_map
        response.outputs['output_PA'].file = PAmask_png
        response.outputs['output_indices'].file = archive_indices
        response.outputs['output_reference'].file = archive_references
        response.outputs['output_prediction'].file = archive_prediction
        response.outputs['output_info'].file = stat_infosconcat

        response.update_status('done', 100)
        return response
    def execute(self):
        logger.info('Start process')

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp

        ################################
        # reading in the input arguments
        ################################
        try:
            resource = self.getInputValues(identifier='resource')
            url_Rdat = self.getInputValues(identifier='Rdat')[0]
            url_dat = self.getInputValues(identifier='dat')[0]
            url_ref_file = self.getInputValues(
                identifier='netCDF')  # can be None
            season = self.getInputValues(identifier='season')[0]
            period = self.getInputValues(identifier='period')[0]
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
        except Exception as e:
            logger.debug('failed to read in the arguments %s ' % e)

        try:
            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')
            # kappa = int(self.getInputValues(identifier='kappa')[0])

            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            logger.info('read in the arguments')
            logger.info('url_ref_file: %s' % url_ref_file)
            logger.info('url_Rdat: %s' % url_Rdat)
            logger.info('url_dat: %s' % url_dat)
        except Exception as e:
            logger.debug('failed to convert arguments %s ' % e)

        ############################
        # fetching trainging data
        ############################

        from flyingpigeon.utils import download, get_time
        from os.path import abspath

        try:
            dat = abspath(download(url_dat))
            Rdat = abspath(download(url_Rdat))
            logger.info('training data fetched')
        except Exception as e:
            logger.error('failed to fetch training data %s' % e)

        ##########################################################
        # get the required bbox and time region from resource data
        ##########################################################
        # from flyingpigeon.weatherregimes import get_level

        from flyingpigeon.ocgis_module import call
        from flyingpigeon.utils import get_variable
        time_range = [start, end]

        variable = get_variable(resource)

        if len(url_ref_file) > 0:
            ref_file = download(url_ref_file[0])
            model_subset = call(
                resource=resource,
                variable=variable,
                time_range=
                time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                regrid_destination=ref_file,
                regrid_options='bil')
            logger.info('Dataset subset with regridding done: %s ' %
                        model_subset)
        else:
            model_subset = call(
                resource=resource,
                variable=variable,
                time_range=
                time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
            )
            logger.info('Dataset time period extracted: %s ' % model_subset)

        #######################
        # computing anomalies
        #######################

        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [
            dt.strptime(cycst, '%Y%m%d'),
            dt.strptime(cycen, '%Y%m%d')
        ]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        # extracting season
        #####################

        model_season = wr.get_season(model_anomal, season=season)

        #######################
        # call the R scripts
        #######################

        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_projection.R'

            yr1 = start.year
            yr2 = end.year
            time = get_time(model_season, format='%Y%m%d')

            # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')
            ip, output_frec = mkstemp(dir=curdir, suffix='.txt')

            args = [
                'Rscript',
                join(Rsrc, Rfile),
                '%s/' % curdir,
                '%s/' % Rsrc,
                '%s' % model_season,
                '%s' % variable,
                '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""),
                # '%s' % output_graphics,
                '%s' % dat,
                '%s' % Rdat,
                '%s' % file_pca,
                '%s' % file_class,
                '%s' % output_frec,
                '%s' % season,
                '%s' % start.year,
                '%s' % end.year,
                '%s' % 'MODEL'
            ]

            logger.info('Rcall builded')
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            logger.error(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(
                args, stdout=subprocess.PIPE,
                stderr=subprocess.PIPE).communicate()
            # , shell=True
            logger.info('R outlog info:\n %s ' % output)
            logger.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                self.status.set('**** weatherregime in R suceeded', 90)
            else:
                logger.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            logger.error(msg)
            raise Exception(msg)

        #################
        # set the outputs
        #################

        # self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue(file_pca)
        self.output_classification.setValue(file_class)
        self.output_netcdf.setValue(model_season)
        self.output_frequency.setValue(output_frec)
Ejemplo n.º 5
0
def reanalyses(start=1948,
               end=None,
               variable='slp',
               dataset='NCEP',
               timres='day',
               getlevel=True):
    """
    Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
    :param start: int for start year to fetch source data
    :param end: int for end year to fetch source data (if None, current year will be the end)
    :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
    :param dataset: default='NCEP'
    :return list: list of path/files.nc
    """
    # used for NETCDF convertion
    from netCDF4 import Dataset
    from os import path, system
    from flyingpigeon.ocgis_module import call
    from shutil import move
    # used for NETCDF convertion

    try:
        from datetime import datetime as dt

        if end is None:
            end = dt.now().year
        obs_data = []

        if start is None:
            if dataset == 'NCEP':
                start = 1948
            if dataset == '20CR':
                start = 1851
        LOGGER.info('start / end date set')
    except:
        msg = "get_OBS module failed to get start end dates"
        LOGGER.exception(msg)
        raise Exception(msg)

    if 'z' in variable:
        level = variable.strip('z')
    else:
        level = None

    LOGGER.info('level: %s' % level)

    try:
        for year in range(start, end + 1):
            LOGGER.debug('fetching single file for %s year %s ' %
                         (dataset, year))
            try:
                if dataset == 'NCEP':
                    if variable == 'slp':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (
                            variable, year)  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (
                            year)  # noqa
                elif dataset == '20CRV2':
                    if variable == 'prmsl':
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % (
                                year)  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/Dailies/pressure/hgt.%s.nc' % (
                                year)  # noqa
                elif dataset == '20CRV2c':
                    if variable == 'prmsl':
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        if timres == '6h':
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % (
                                year)  # noqa
                        else:
                            url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/Dailies/pressure/hgt.%s.nc' % (
                                year)  # noqa
                else:
                    LOGGER.debug('Dataset %s not known' % dataset)
                LOGGER.debug('url: %s' % url)
            except:
                msg = "could not set url"
                LOGGER.exception(msg)
            try:
                df = utils.download(url, cache=True)
                LOGGER.debug('single file fetched %s ' % year)
                # convert to NETCDF4_CLASSIC
                try:
                    ds = Dataset(df)
                    df_time = ds.variables['time']
                    # Here, need to check not just calendar, but that file is ncdf_classic already...
                    if (hasattr(df_time, 'calendar')) is False:
                        p, f = path.split(path.abspath(df))
                        LOGGER.debug("path = %s , file %s " % (p, f))
                        # May be an issue if several users are working at the same time
                        move(df, f)
                        conv = call(resource=f,
                                    output_format_options={
                                        'data_model': 'NETCDF4_CLASSIC'
                                    },
                                    dir_output=p,
                                    prefix=f.replace('.nc', ''))
                        obs_data.append(conv)
                        LOGGER.debug('file %s to NETCDF4_CLASSIC converted' %
                                     conv)
                        # Cleaning, could be 50gb... for each (!) user
                        # TODO Check how links work
                        cmdrm = 'rm -f %s' % (f)
                        system(cmdrm)
                    else:
                        obs_data.append(df)
                    ds.close()
                except:
                    LOGGER.exception('failed to convert into NETCDF4_CLASSIC')
            except:
                msg = "download failed on {0}.".format(url)
                LOGGER.exception(msg)
        LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data))
    except:
        msg = "get reanalyses module failed to fetch data"
        LOGGER.exception(msg)
        raise Exception(msg)

    if (level is None) or (getlevel == False):
        data = obs_data
    else:
        LOGGER.info('get level: %s' % level)
        data = get_level(obs_data, level=level)
    return data
Ejemplo n.º 6
0
def reanalyses(start=1948, end=None, variable='slp', dataset='NCEP'):
    """
  Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
  :param start: int for start year to fetch source data
  :param end: int for end year to fetch source data (if None, current year will be the end)
  :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
  :param dataset: default='NCEP' 
  :return list: list of path/files.nc 
  """
    try:
        from datetime import datetime as dt

        if end == None:
            end = dt.now().year
        obs_data = []

        if start == None:
            if dataset == 'NCEP':
                start = 1948
            if dataset == '20CR':
                start = 1851
        logger.info('start / end date set')
    except Exception as e:
        msg = "get_OBS module failed to get start end dates %s " % e
        logger.debug(msg)
        raise Exception(msg)

    if 'z' in variable:
        level = variable.strip('z')
    else:
        level = None

    logger.info('level: %s' % level)

    try:
        for year in range(start, end + 1):
            try:
                if dataset == 'NCEP':
                    if variable == 'slp':
                        url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (
                            variable, year)
                    elif 'z' in variable:
                        url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (
                            year)
                elif dataset == '20CRV2':
                    if variable == 'prmsl':
                        url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year
                    if 'z' in variable:
                        url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % (
                            year)
                elif dataset == '20CRV2c':
                    if variable == 'prmsl':
                        url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year
                    if 'z' in variable:
                        url = 'http://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % (
                            year)
                else:
                    logger.error('Dataset %s not known' % dataset)
            except Exception as e:
                msg = "could not set url: %s " % e
                logger.debug(msg)
                raise Exception(msg)

            try:
                obs_data.append(utils.download(url, cache=True))
            except:
                msg = "wget failed on {0}.".format(url)
                logger.debug(msg)
                raise Exception(msg)

        logger.info('Reanalyses data fetched for %s files' % len(obs_data))
    except Exception as e:
        msg = "get reanalyses module failed to fetch data %s " % e
        logger.debug(msg)
        raise Exception(msg)

    if level == None:
        data = obs_data
    else:
        logger.info('get level: %s' % level)
        data = get_level(obs_data, level=level)
    return data
Ejemplo n.º 7
0
def reanalyses(start=1948, end=None, variable='slp', dataset='NCEP'):
    """
    Fetches the reanalysis data (NCEP, 20CR or ERA_20C) to local file system
    :param start: int for start year to fetch source data
    :param end: int for end year to fetch source data (if None, current year will be the end)
    :param variable: variable name (default='slp'), geopotential height is given as e.g. z700
    :param dataset: default='NCEP'
    :return list: list of path/files.nc
    """
    # used for NETCDF convertion
    from os import path
    from flyingpigeon.ocgis_module import call
    from shutil import move
    # used for NETCDF convertion

    try:
        from datetime import datetime as dt

        if end is None:
            end = dt.now().year
        obs_data = []

        if start is None:
            if dataset == 'NCEP':
                start = 1948
            if dataset == '20CR':
                start = 1851
        LOGGER.info('start / end date set')
    except:
        msg = "get_OBS module failed to get start end dates"
        LOGGER.exception(msg)
        raise Exception(msg)

    if 'z' in variable:
        level = variable.strip('z')
    else:
        level = None

    LOGGER.info('level: %s' % level)

    try:
        for year in range(start, end + 1):
            LOGGER.debug('fetching single file for %s year %s ' %
                         (dataset, year))
            try:
                if dataset == 'NCEP':
                    if variable == 'slp':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (
                            variable, year)  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (
                            year)  # noqa
                elif dataset == '20CRV2':
                    if variable == 'prmsl':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2/pressure/hgt.%s.nc' % (
                            year)  # noqa
                elif dataset == '20CRV2c':
                    if variable == 'prmsl':
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/monolevel/prmsl.%s.nc' % year  # noqa
                    if 'z' in variable:
                        url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/20thC_ReanV2c/pressure/hgt.%s.nc' % (
                            year)  # noqa
                else:
                    LOGGER.debug('Dataset %s not known' % dataset)
                LOGGER.debug('url: %s' % url)
            except:
                msg = "could not set url"
                LOGGER.exception(msg)
            try:
                df = utils.download(url, cache=True)
                LOGGER.debug('single file fetched %s ' % year)
                # convert to NETCDF4_CLASSIC
                try:
                    p, f = path.split(path.abspath(df))
                    LOGGER.debug("path = %s , file %s " % (p, f))
                    move(df, f)
                    conv = call(resource=f,
                                output_format_options={
                                    'data_model': 'NETCDF4_CLASSIC'
                                },
                                dir_output=p,
                                prefix=f.replace('.nc', ''))
                    obs_data.append(conv)
                    LOGGER.debug('file %s to NETCDF4_CLASSIC converted' % conv)
                except:
                    LOGGER.exception('failed to convert into NETCDF4_CLASSIC')
            except:
                msg = "download failed on {0}.".format(url)
                LOGGER.exception(msg)
        LOGGER.info('Reanalyses data fetched for %s files' % len(obs_data))
    except:
        msg = "get reanalyses module failed to fetch data"
        LOGGER.exception(msg)
        raise Exception(msg)

    if level is None:
        data = obs_data
    else:
        LOGGER.info('get level: %s' % level)
        data = get_level(obs_data, level=level)
    return data
Ejemplo n.º 8
0
from flyingpigeon.utils import download
from ocgis import RequestDataset, OcgOperations

variable = 'slp'
year = 2000

url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/surface/%s.%s.nc' % (variable, year)

f = download(url)
rd = RequestDataset(f)
ds = OcgOperations(rd,output_format_options={'data_model': 'NETCDF4_CLASSIC'},prefix='converted_to_NETCDF_CLASSIC' ).execute()
ds = OcgOperations(rd,output_format_options={'data_model': 'NETCDF4_CLASSIC'}, output_format = 'nc', prefix='converted_to_NETCDF_CLASSIC' ).execute()
print ds
    def execute(self):
        logger.info('Start process')
        from datetime import datetime as dt
        from flyingpigeon import weatherregimes as wr
        from tempfile import mkstemp
        
        ################################
        # reading in the input arguments
        ################################
        try: 
            resource = self.getInputValues(identifier='resource')
            url_Rdat = self.getInputValues(identifier='Rdat')[0]
            url_dat = self.getInputValues(identifier='dat')[0]
            url_ref_file = self.getInputValues(identifier='netCDF') # can be None
            season = self.getInputValues(identifier='season')[0]
            period = self.getInputValues(identifier='period')[0]            
            anualcycle = self.getInputValues(identifier='anualcycle')[0]
        except Exception as e: 
            logger.debug('failed to read in the arguments %s ' % e)
        
        try: 
            start = dt.strptime(period.split('-')[0] , '%Y%m%d')
            end = dt.strptime(period.split('-')[1] , '%Y%m%d')
            # kappa = int(self.getInputValues(identifier='kappa')[0])
            
            logger.info('period %s' % str(period))
            logger.info('season %s' % str(season))
            logger.info('read in the arguments')
            logger.info('url_ref_file: %s' % url_ref_file)
            logger.info('url_Rdat: %s' % url_Rdat)
            logger.info('url_dat: %s' % url_dat)
        except Exception as e: 
            logger.debug('failed to convert arguments %s ' % e)
           
        ############################
        # fetching trainging data 
        ############################
        
        from flyingpigeon.utils import download, get_time
        from os.path import abspath
        
        try:
          dat = abspath(download(url_dat))
          Rdat = abspath(download(url_Rdat))
          logger.info('training data fetched')
        except Exception as e:
          logger.error('failed to fetch training data %s' % e)
          
        ############################################################    
        ### get the required bbox and time region from resource data
        ############################################################        
        # from flyingpigeon.weatherregimes import get_level
        
        from flyingpigeon.ocgis_module import call 
        from flyingpigeon.utils import get_variable
        time_range = [start, end]

        variable = get_variable(resource)

        if len(url_ref_file) > 0:
            ref_file = download(url_ref_file[0])  
            model_subset = call(resource=resource, variable=variable, 
                time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                regrid_destination=ref_file, regrid_options='bil')
            logger.info('Dataset subset with regridding done: %s ' % model_subset)
        else:
            model_subset = call(resource=resource, variable=variable, 
                time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                )
            logger.info('Dataset time period extracted: %s ' % model_subset)
            
        
        ##############################################
        ### computing anomalies 
        ##############################################
        
        cycst = anualcycle.split('-')[0]
        cycen = anualcycle.split('-')[0]
        reference = [dt.strptime(cycst,'%Y%m%d'), dt.strptime(cycen,'%Y%m%d')]
        model_anomal = wr.get_anomalies(model_subset, reference=reference)

        #####################
        ### extracting season
        #####################
        model_season = wr.get_season(model_anomal, season=season)

        #######################
        ### call the R scripts
        #######################
        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
          rworkspace = curdir
          Rsrc = config.Rsrc_dir() 
          Rfile = 'weatherregimes_projection.R'
          
          yr1 = start.year
          yr2 = end.year
          time = get_time(model_season, format='%Y%m%d')

          #ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
          ip, file_pca = mkstemp(dir=curdir ,suffix='.txt')
          ip, file_class = mkstemp(dir=curdir ,suffix='.Rdat')
          ip, output_frec = mkstemp(dir=curdir ,suffix='.txt')
                    
          args = ['Rscript', join(Rsrc,Rfile), '%s/' % curdir, 
                  '%s/' % Rsrc, 
                  '%s' % model_season, 
                  '%s' % variable,
                  '%s' % str(time).strip("[]").replace("'","").replace(" ",""),
            #      '%s' % output_graphics,
                  '%s' % dat, 
                  '%s' % Rdat, 
                  '%s' % file_pca,
                  '%s' % file_class, 
                  '%s' % output_frec,      
                  '%s' % season, 
                  '%s' % start.year, 
                  '%s' % end.year,                  
                  '%s' % 'MODEL']

          logger.info('Rcall builded')
        except Exception as e: 
          msg = 'failed to build the R command %s' % e
          logger.error(msg)  
          raise Exception(msg)
        try:
          output,error = subprocess.Popen(args, stdout = subprocess.PIPE, stderr= subprocess.PIPE).communicate() #, shell=True
          logger.info('R outlog info:\n %s ' % output)
          logger.debug('R outlog errors:\n %s ' % error)
          if len(output) > 0:            
            self.status.set('**** weatherregime in R suceeded', 90)
          else:
            logger.error('NO! output returned from R call')
        except Exception as e: 
          msg = 'weatherregime in R %s ' % e
          logger.error(msg)  
          raise Exception(msg)

        ############################################
        ### set the outputs
        ############################################

        #self.Routput_graphic.setValue( output_graphics )
        self.output_pca.setValue( file_pca )
        self.output_classification.setValue( file_class )
        self.output_netcdf.setValue( model_season )
        self.output_frequency.setValue( output_frec )
Ejemplo n.º 10
0
    def _handler(self, request, response):
        init_process_logger('log.txt')
        response.outputs['output_log'].file = 'log.txt'

        response.update_status('execution started at : {}'.format(dt.now()), 5)

        ################################
        # reading in the input arguments
        ################################
        try:
            LOGGER.info('read in the arguments')
            # resources = self.getInputValues(identifier='resources')
            season = request.inputs['season'][0].data
            LOGGER.info('season %s', season)

            period = request.inputs['period'][0].data
            LOGGER.info('period %s', period)
            anualcycle = request.inputs['anualcycle'][0].data

            start = dt.strptime(period.split('-')[0], '%Y%m%d')
            end = dt.strptime(period.split('-')[1], '%Y%m%d')
            LOGGER.debug('start: %s , end: %s ' % (start, end))

            resource = archiveextract(resource=rename_complexinputs(request.inputs['resource']))
            # resource = archiveextract(resource=[res.file for res in request.inputs['resource']])
            url_Rdat = request.inputs['Rdat'][0].data
            url_dat = request.inputs['dat'][0].data
            url_ref_file = request.inputs['netCDF'][0].data  # can be None
            # season = self.getInputValues(identifier='season')[0]
            # period = self.getInputValues(identifier='period')[0]
            # anualcycle = self.getInputValues(identifier='anualcycle')[0]
            LOGGER.info('period %s' % str(period))
            LOGGER.info('season %s' % str(season))
            LOGGER.info('read in the arguments')
            LOGGER.info('url_ref_file: %s' % url_ref_file)
            LOGGER.info('url_Rdat: %s' % url_Rdat)
            LOGGER.info('url_dat: %s' % url_dat)
        except Exception as e:
            LOGGER.debug('failed to convert arguments %s ' % e)

        ############################
        # fetching trainging data
        ############################

        try:
            dat = abspath(download(url_dat))
            Rdat = abspath(download(url_Rdat))
            LOGGER.info('training data fetched')
        except Exception as e:
            LOGGER.error('failed to fetch training data %s' % e)

        ##########################################################
        # get the required bbox and time region from resource data
        ##########################################################
        # from flyingpigeon.weatherregimes import get_level
        try:
            from flyingpigeon.ocgis_module import call
            from flyingpigeon.utils import get_variable
            time_range = [start, end]

            variable = get_variable(resource)

            if len(url_ref_file) > 0:
                ref_file = download(url_ref_file)
                model_subset = call(
                    resource=resource, variable=variable,
                    time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                    regrid_destination=ref_file, regrid_options='bil')
                LOGGER.info('Dataset subset with regridding done: %s ' % model_subset)
            else:
                model_subset = call(
                    resource=resource, variable=variable,
                    time_range=time_range,  # conform_units_to=conform_units_to, geom=bbox, spatial_wrapping='wrap',
                )
                LOGGER.info('Dataset time period extracted: %s ' % model_subset)
        except:
            LOGGER.exception('failed to make a data subset ')

        #######################
        # computing anomalies
        #######################
        try:
            cycst = anualcycle.split('-')[0]
            cycen = anualcycle.split('-')[0]
            reference = [dt.strptime(cycst, '%Y%m%d'), dt.strptime(cycen, '%Y%m%d')]
            model_anomal = wr.get_anomalies(model_subset, reference=reference)

            #####################
            # extracting season
            #####################

            model_season = wr.get_season(model_anomal, season=season)
        except:
            LOGGER.exception('failed to compute anualcycle or seasons')

        #######################
        # call the R scripts
        #######################

        import shlex
        import subprocess
        from flyingpigeon import config
        from os.path import curdir, exists, join

        try:
            rworkspace = curdir
            Rsrc = config.Rsrc_dir()
            Rfile = 'weatherregimes_projection.R'

            yr1 = start.year
            yr2 = end.year
            time = get_time(model_season, format='%Y%m%d')

            # ip, output_graphics = mkstemp(dir=curdir ,suffix='.pdf')
            ip, file_pca = mkstemp(dir=curdir, suffix='.txt')
            ip, file_class = mkstemp(dir=curdir, suffix='.Rdat')
            ip, output_frec = mkstemp(dir=curdir, suffix='.txt')

            args = ['Rscript', join(Rsrc, Rfile), '%s/' % curdir,
                    '%s/' % Rsrc,
                    '%s' % model_season,
                    '%s' % variable,
                    '%s' % str(time).strip("[]").replace("'", "").replace(" ", ""),
                    # '%s' % output_graphics,
                    '%s' % dat,
                    '%s' % Rdat,
                    '%s' % file_pca,
                    '%s' % file_class,
                    '%s' % output_frec,
                    '%s' % season,
                    '%s' % start.year,
                    '%s' % end.year,
                    '%s' % 'MODEL']

            LOGGER.info('Rcall builded')
        except Exception as e:
            msg = 'failed to build the R command %s' % e
            LOGGER.error(msg)
            raise Exception(msg)
        try:
            output, error = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
            # , shell=True
            LOGGER.info('R outlog info:\n %s ' % output)
            LOGGER.debug('R outlog errors:\n %s ' % error)
            if len(output) > 0:
                response.update_status('**** weatherregime in R suceeded', 90)
            else:
                LOGGER.error('NO! output returned from R call')
        except Exception as e:
            msg = 'weatherregime in R %s ' % e
            LOGGER.error(msg)
            raise Exception(msg)

        #################
        # set the outputs
        #################

        response.update_status('Set the process outputs ', 95)

        response.outputs['output_pca'].file = file_pca
        response.outputs['output_classification'].file = file_class
        response.outputs['output_netcdf'].file = model_season
        response.outputs['output_frequency'].file = output_frec

        response.update_status('done', 100)
        return response
Ejemplo n.º 11
0
from flyingpigeon import utils
from ocgis import RequestDataset, OcgOperations, envs
from ocgis.util.large_array import compute

from datetime import datetime as dt
import uuid

# years = range(1948,2018)
years = range(2010, 2012)

ncs = []
for year in years:
    url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/hgt.%s.nc' % (year)
    ncs.extend(utils.download(url, cache=True))


level_range = [700, 700]
time_range = [dt.strptime('20100315', '%Y%m%d'), dt.strptime('20111210', '%Y%m%d')]
bbox = [-80, 20, 20, 70]


rd = RequestDataset(ncs)

ops = OcgOperations(rd,
                    time_range=time_range,
                    level_range=level_range,
                    geom=bbox,
                    output_format='nc',
                    prefix='ocgis_module_optimisation',
                    add_auxiliary_files=False)
Ejemplo n.º 12
0
def test_download_with_cache():
    filename = utils.download(TESTDATA["cmip5_tasmax_2006_nc"], cache=True)
    assert os.path.basename(filename) == "tasmax_Amon_MPI-ESM-MR_rcp45_r1i1p1_200601-200612.nc"
Ejemplo n.º 13
0
    def execute(self):

        init_process_logger('log.txt')
        self.output_log.setValue('log.txt')

        from os.path import basename
        from flyingpigeon import sdm
        from flyingpigeon.utils import archive, archiveextract, download
        self.status.set('Start process', 0)

        try:
            logger.info('reading the arguments')
            resources_raw = self.getInputValues(identifier='resources')
            csv_url = self.getInputValues(identifier='gbif')[0]
            period = self.getInputValues(identifier='period')
            period = period[0]
            indices = self.getInputValues(identifier='input_indices')
            archive_format = self.archive_format.getValue()
            logger.info('indices %s ' % indices)
            logger.debug('csv_url %s' % csv_url)
        except Exception as e:
            logger.error('failed to read in the arguments %s ' % e)
            raise

        try:
            logger.info('set up the environment')
            csv_file = download(csv_url)
            resources = archiveextract(resources_raw)
        except Exception as e:
            logger.error('failed to set up the environment %s ' % e)
            raise

        try:
            self.status.set('read in latlon coordinates', 10)
            latlon = sdm.latlon_gbifcsv(csv_file)
            logger.info('got occurence coordinates %s ' % csv_file)
        except Exception as e:
            logger.exception(
                'failed to extract the latlon points from file: %s: %s' %
                (csv_file, e))

        try:
            self.status.set('plot map', 20)
            from flyingpigeon.visualisation import map_gbifoccurrences

            # latlon = sdm.latlon_gbifdic(gbifdic)
            occurence_map = map_gbifoccurrences(latlon)
        except Exception as e:
            logger.exception('failed to plot occurence map %s' % e)

        #################################
        # calculate the climate indices
        #################################

        # get the indices
        ncs_indices = None
        try:
            self.status.set(
                'start calculation of climate indices for %s' % indices, 30)
            ncs_indices = sdm.get_indices(resources=resources, indices=indices)
            logger.info('indice calculation done')
        except:
            msg = 'failed to calculate indices'
            logger.exception(msg)
            raise Exception(msg)

        try:
            self.status.set('get domain', 30)
            domains = set()
            for resource in ncs_indices:
                # get_domain works only if metadata are set in a correct way
                domains = domains.union([basename(resource).split('_')[1]])
            if len(domains) == 1:
                domain = list(domains)[0]
                logger.debug('Domain %s found in indices files' % domain)
            else:
                logger.error('Not a single domain in indices files %s' %
                             domains)
        except Exception as e:
            logger.exception('failed to get domains %s' % e)

        try:
            self.status.set('generating the PA mask', 20)
            PAmask = sdm.get_PAmask(coordinates=latlon, domain=domain)
            logger.info('PA mask sucessfully generated')
        except Exception as e:
            logger.exception('failed to generate the PA mask: %s' % e)

        try:
            self.status.set('Ploting PA mask', 25)
            from flyingpigeon.visualisation import map_PAmask
            PAmask_png = map_PAmask(PAmask)
        except Exception as e:
            logger.exception('failed to plot the PA mask: %s' % e)

        try:
            # sort indices
            indices_dic = None
            indices_dic = sdm.sort_indices(ncs_indices)
            logger.info('indice files sorted for %s Datasets' %
                        len(indices_dic.keys()))
        except:
            msg = 'failed to sort indices'
            logger.exception(msg)
            raise Exception(msg)

        ncs_references = []
        species_files = []
        stat_infos = []

        for count, key in enumerate(indices_dic.keys()):
            try:
                staus_nr = 40 + count * 10
                self.status.set('Start processing of %s' % key, staus_nr)
                ncs = indices_dic[key]
                logger.info('with %s files' % len(ncs))
                try:
                    ncs_reference = sdm.get_reference(ncs_indices=ncs,
                                                      period=period)
                    ncs_references.extend(ncs_reference)
                    logger.info('reference indice calculated %s ' %
                                ncs_references)
                except:
                    msg = 'failed to calculate the reference'
                    logger.exception(msg)
                    raise Exception(msg)

                try:
                    gam_model, predict_gam, gam_info = sdm.get_gam(
                        ncs_reference, PAmask)
                    stat_infos.append(gam_info)
                    self.status.set('GAM sucessfully trained', staus_nr + 5)
                except Exception as e:
                    msg = 'failed to train GAM for %s : %s' % (key, e)
                    logger.debug(msg)

                try:
                    prediction = sdm.get_prediction(gam_model, ncs)
                    self.status.set('prediction done', staus_nr + 7)
                except Exception as e:
                    msg = 'failed to predict tree occurence %s' % e
                    logger.exception(msg)
                    # raise Exception(msg)

                try:
                    self.status.set('land sea mask for predicted data',
                                    staus_nr + 8)
                    from numpy import invert, isnan, nan, broadcast_arrays  # , array, zeros, linspace, meshgrid
                    mask = invert(isnan(PAmask))
                    mask = broadcast_arrays(prediction, mask)[1]
                    prediction[mask is False] = nan
                except Exception as e:
                    logger.debug('failed to mask predicted data: %s' % e)

                try:
                    species_files.append(sdm.write_to_file(ncs[0], prediction))
                    logger.info('Favourabillity written to file')
                except Exception as e:
                    msg = 'failed to write species file %s' % e
                    logger.debug(msg)
                    # raise Exception(msg)

            except Exception as e:
                msg = 'failed to calculate reference indices. %s ' % e
                logger.exception(msg)
                raise Exception(msg)

        try:
            archive_indices = None
            archive_indices = archive(ncs_indices, format=archive_format)
            logger.info('indices added to archive')
        except:
            msg = 'failed adding indices to archive'
            logger.exception(msg)
            raise Exception(msg)

        archive_references = None
        try:
            archive_references = archive(ncs_references, format=archive_format)
            logger.info('indices reference added to archive')
        except:
            msg = 'failed adding reference indices to archive'
            logger.exception(msg)
            raise Exception(msg)

        archive_predicion = None
        try:
            archive_predicion = archive(species_files, format=archive_format)
            logger.info('species_files added to archive')
        except:
            msg = 'failed adding species_files indices to archive'
            logger.exception(msg)
            raise Exception(msg)

        try:
            from flyingpigeon.visualisation import pdfmerge
            stat_infosconcat = pdfmerge(stat_infos)
            logger.info('stat infos pdfs merged')
        except:
            logger.exception('failed to concat images')
            _, stat_infosconcat = tempfile.mkstemp(suffix='.pdf',
                                                   prefix='foobar-',
                                                   dir='.')

        # self.output_csv.setValue(csv_file)
        self.output_gbif.setValue(occurence_map)
        self.output_PA.setValue(PAmask_png)
        self.output_indices.setValue(archive_indices)
        self.output_reference.setValue(archive_references)
        self.output_prediction.setValue(archive_predicion)
        self.output_info.setValue(stat_infosconcat)

        self.status.set('done', 100)