Example #1
0
    def __init__(self, **kwargs):

        # list to hold all the parameter names; will be accessed in super to 
        # construct dependency graph
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError('Required parameter %s for DamFilter not provided' % param)

        # set all required parameters
        for key in self.__required_params:
            setattr(self,key,kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            # if key not provided in optional arguments, defaults value to None
            setattr(self,key,kwargs.get(key,None))
            
        # check if neither of the optional params have been provided
        # note that shapefile takes precedence
        if self.shapefile is None and self.extent is None:
            raise GeoEDFError('Either a shapefile path or extent needs to be provided for DamFilter')
            
        # initialize filter values array
        self.values = []

        # class super class init
        super().__init__()
Example #2
0
    def filter(self):

        # set lat-lon limits for CONUS to check if state falls in there
        latmin = 24
        latmax = 50
        lonmin = -125
        lonmax = -65

        # load up the tiger lines shapefile;
        driver = ogr.GetDriverByName('ESRI Shapefile')
        inDataset = driver.Open(self.__states_shapefile, 0)
        if inDataset is None:
            raise GeoEDFError(
                'Error opening Tiger States shapefile in CONUSStateFilter')
        inLayer = inDataset.GetLayer()

        # for each state feature check if it's lat-lon falls in CONUS limits
        try:
            # loop through features in the layer and retrieve state USPS code and lat-lon
            for feature in inLayer:
                state_code = feature.GetField("STUSPS")
                state_lat = float(feature.GetField("INTPTLAT"))
                state_lon = float(feature.GetField("INTPTLON"))
                if latmin < state_lat < latmax:
                    if lonmin < state_lon < lonmax:
                        self.values.append(state_code)
        except:
            raise GeoEDFError(
                "Error processing Tiger states shapefile in CONUSStateFilter")
Example #3
0
    def filter(self):

        # convert the start and end dates from strings to Pandas DateTime
        try:
            # check if time is present
            if self.has_time:
                start_date = pd.to_datetime(self.start,
                                            format='%m/%d/%Y %H:%M:%S')
            else:
                start_date = pd.to_datetime(self.start, format='%m/%d/%Y')
            if self.end is not None:
                if self.has_time:
                    end_date = pd.to_datetime(self.end,
                                              format='%m/%d/%Y %H:%M:%S')
                else:
                    end_date = pd.to_datetime(self.end, format='%m/%d/%Y')
        except ValueError as e:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DateTimeFilter : %s'
                % e)
        except:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DateTimeFilter'
            )

        # use the period to generate all intervening dates
        try:
            # if exact_dates is used and the period is n days, process differently
            # essentially reset start date to align with period

            if (not self.exact_dates) and (self.period[-1:] == 'D'):

                start_year = start_date.strftime('%Y')
                start_day_of_year = int(start_date.strftime('%j'))
                period_num = int(self.period[:-1])
                # check if start day aligns with period
                if (start_day_of_year - 1) % period_num > 0:
                    new_start_day_of_year = math.ceil(
                        (start_day_of_year - 1) / period_num) * period_num + 1
                    start_date = pd.to_datetime(
                        '%d/%s' % (new_start_day_of_year, start_year),
                        format='%j/%Y')

            if self.end is not None:
                all_dates = pd.date_range(start=start_date,
                                          end=end_date,
                                          freq=self.period)
            else:
                all_dates = [start_date]

            # convert back to string using the pattern
            for dt in all_dates:
                self.values.append(dt.strftime(self.pattern))

        except ValueError as e:
            raise GeoEDFError('Error applying DateTimeFilter : %s' % e)
        except:
            raise GeoEDFError('Unknown error applying DateTimeFilter')
def getFileList(url, auth):
    if '*' in url:  #has wildcard
        # first get the base URL to get a listing of files
        partitioned = url.rpartition('/')
        base_url = partitioned[0]
        poss_filename = partitioned[2]
        # naive check whether poss_filename is indeed a file
        if '.' in poss_filename and '*' in poss_filename:
            filename_pattern = poss_filename
            try:
                # get a listing of files from the base_url
                session = SessionWithHeaderRedirection(auth['user'],
                                                       auth['password'])
                res = session.get(base_url)

                res.raise_for_status()

                # parse the returned HTML to get a possible file listing
                parser = HTMLHelper()
                parser.feed(res.text)
                files = parser.pathList

                result = []
                for filename in files:
                    # some filenames may be an absolute or relative path
                    if '/' in filename:
                        actual_filename = os.path.basename(filename)
                    else:
                        actual_filename = filename
                    if fnmatch.fnmatch(actual_filename, filename_pattern):
                        # if path leads with a /, we need to revise the url, else can just append
                        if filename.startswith('/'):
                            # get the URL prefix
                            if base_url.startswith('https://'):
                                skip = 8  # number of characters to skip in prefix
                            elif base_url.startswith('http://'):
                                skip = 7
                            else:
                                skip = 0

                            next_slash = base_url.find('/', skip)
                            if next_slash != -1:
                                url_prefix = base_url[:next_slash]
                            else:
                                url_prefix = base_url
                            result.append('%s%s' % (url_prefix, filename))
                        else:
                            result.append('%s/%s' % (base_url, filename))
                return result
            except requests.exceptions.HTTPError:
                raise GeoEDFError('Error accessing file listing at URL')
            except:
                raise
        else:
            raise GeoEDFError('URL does not point to a file or set of files')
    else:
        return [url]
Example #5
0
    def get(self):

        # semantic checking of parameters
        # process dates
        try:
            startdate = pd.to_datetime(self.start_date,format='%m/%d/%Y')
            enddate = pd.to_datetime(self.end_date,format='%m/%d/%Y')
        except:
            raise GeoEDFError("Error parsing dates provided to GHCNDInput, please ensure format is mm/dd/YYYY")
            
        # param checks complete
        try:
            # parse out station_id
            station_id = self.station_id.split(':')[1]
            
            # use new API
            # construct URL
            station_data_url = "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=SNOW,PRCP,SNWD,TMIN,TMAX&stations=%s&startDate=2010-08-30&endDate=2020-09-30&format=json" % station_id
            
            res = requests.get(station_data_url)
            res.raise_for_status()
            
            station_data = pd.read_json(res.text)

            # first reindex data by date
            station_data.set_index(pd.to_datetime(station_data['DATE']), inplace=True)

        except:
            print("Error fetching GHCND data for station %s in GHCNDInput" % self.station_id)
            return
            
        # for each of the five params, first check if we have sufficient data
        # then write out to CSV file
        for met_param in self.met_params:
            try:
                if met_param == 'PRCP' or met_param == 'TMAX' or met_param == 'TMIN':
                    if met_param in station_data:
                        num_nan = station_data[met_param].isna().sum()
                        if num_nan < 365: # then we are fine
                            # write out csv file
                            param_csvfile = '%s/%s_%s.csv' % (self.target_path,self.station_id,met_param)
                            param_data = station_data.filter([met_param])
                            param_data.to_csv(param_csvfile)
                # check for snow params
                if met_param == 'SNOW' or met_param == 'SNWD':
                    if met_param in station_data:
                        num_nan = station_data[met_param].isna().sum()
                        if num_nan < 3500:
                            # write out csv file
                            param_csvfile = '%s/%s_%s.csv' % (self.target_path,self.station_id,met_param)
                            param_data = station_data.filter([met_param])
                            param_data.to_csv(param_csvfile)
            except:
                raise GeoEDFError("Error occurred while writing out %s data to CSV for station %s in GHCNDInput" % (met_param,self.station_id))
Example #6
0
    def filter(self):

        # first load up the shapefile to determine its projection
        driver = ogr.GetDriverByName('ESRI Shapefile')
        inDataset = driver.Open(self.shapefile, 0)
        if inDataset is None:
            raise GeoEDFError('Error opening shapefile %s in ShpExtentFilter' %
                              self.shapefile)
        inLayer = inDataset.GetLayer()
        try:
            inSpatialRef = inLayer.GetSpatialRef()
        except:
            raise GeoEDFError(
                'Error determining projection of input shapefile, cannot fetch extents in lat-lon'
            )

        # construct the desired output projection
        try:
            outSpatialRef = osr.SpatialReference()
            outSpatialRef.ImportFromEPSG(4326)
        except BaseException as e:
            raise GeoEDFError(
                'Error occurred when constructing target projection: %s' % e)

        try:
            # create Coordinate Transformation
            coordTransform = osr.CoordinateTransformation(
                inSpatialRef, outSpatialRef)

            # get layer extent
            inExtent = inLayer.GetExtent()

            # extent is in the format: xmin,xmax,ymin,ymax

            # construct the point geometry for both bottom left and top right
            # then reproject
            bottomLeft = ogr.Geometry(ogr.wkbPoint)
            bottomLeft.AddPoint(inExtent[0], inExtent[2])

            topRight = ogr.Geometry(ogr.wkbPoint)
            topRight.AddPoint(inExtent[1], inExtent[3])

            bottomLeft.Transform(coordTransform)
            topRight.Transform(coordTransform)

            self.values.append('%f,%f,%f,%f' %
                               (bottomLeft.GetY(), topRight.GetY(),
                                bottomLeft.GetX(), topRight.GetX()))

        except:
            raise GeoEDFError(
                "Error occurred when trying to reproject extents")
Example #7
0
    def __init__(self, **kwargs):

        #list to hold all parameter names
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError(
                    'Required parameter %s for SimpleDataClean not provided' %
                    param)

        # set all required parameters
        for key in self.__required_params:
            setattr(self, key, kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            #if key not provided in optional arguments, defaults value to None
            setattr(self, key, kwargs.get(key, None))

        # fetch static reg, crop, and livestock set CSVs that are packaged with processor
        # if no overrides have been provided
        # look in setup.py data_files for location where these have been placed
        if self.regsets_csv is None:
            self.regsets_csv = '/usr/local/data/reg_sets.csv'
        if self.cropsets_csv is None:
            self.cropsets_csv = '/usr/local/data/crop_sets.csv'
        if self.livestocksets_csv is None:
            self.livestocksets_csv = '/usr/local/data/livestock_sets.csv'

        # also fetch the static region maps csv; this file is always packaged with the processor
        self.regmaps_csv = '/usr/local/data/reg_map.csv'

        # finally, the R script that needs to be executed
        # this is stored at /usr/local/bin
        self.data_clean_script = '/usr/local/bin/01_data_clean.r'

        # validate start and end years
        try:
            if int(self.start_year) > int(self.end_year):
                raise GeoEDFError(
                    'start_year must be smaller than end_year in SimpleDataClean'
                )
        except:
            raise GeoEDFError(
                'Error occurred when validating start_year and end_year for SimpleDataClean; make sure they are integers'
            )

        # super class init
        super().__init__()
Example #8
0
def HDF_proj_WKT(hdf_filepath):
    # returns the projection of the HDF file in Well Known Text (WKT) format

    # first determine the HDF type
    hdf_type = HDF_type(hdf_filepath)

    if hdf_type == 'hdf4':
        # for HDF4 assume corner coordinates are stored in the StructMetadata.0 section
        hdf_file = SD(hdf_filepath, SDC.READ)

        try:
            # access grid metadata section of StructMetadata.0
            fattr = hdf_file.attributes(full=1)
            structmeta = fattr['StructMetadata.0']
            gridmeta = structmeta[0]

            # determine the projection GCTP code from the grid metadata
            proj_regex = re.compile(r'''Projection=(?P<projection>\w+)''',
                                    re.VERBOSE)
            match = proj_regex.search(gridmeta)
            proj = match.group('projection')

            # support MODIS sinusoidal projection for now, add others later
            if proj == 'GCTP_SNSOID':
                sinu_proj4 = "+proj=sinu +R=6371007.181 +nadgrids=@null +wktext"
                srs = osr.SpatialReference()
                srs.ImportFromProj4(sinu_proj4)
                return srs.ExportToWkt()
        except:
            #prjfile = open('/home/rkalyana/GeoEDF/GeoEDF/connector/filter/modis/6933.prj', 'r')
            #prj_txt = prjfile.read()
            #srs = osr.SpatialReference()
            #srs.ImportFromESRI([prj_txt])
            #prjfile.close()
            #return srs.ExportToWkt()
            raise GeoEDFError(
                'Error determining the projection or unsupported projection')

    else:  # HDF5 file; only SMAP files in EASE Grid 2.0 are supported at the moment
        hdf_file = h5py.File(hdf_filepath, mode='r')

        # check to see if this is a EASE Grid 2.0 file
        if 'EASE2_global_projection' in hdf_file.keys():
            ease_proj4 = "+proj=cea +lat_0=0 +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m"
            srs = osr.SpatialReference()
            srs.ImportFromProj4(ease_proj4)
            return srs.ExportToWkt()
        else:
            raise GeoEDFError(
                'Error determining the projection or unsupported projection')
Example #9
0
    def __init__(self, **kwargs):

        # list to hold all the parameter names; will be accessed in super to
        # construct dependency graph
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError(
                    'Required parameter %s for OpenDAPFilter not provided' %
                    param)

        # set all required parameters
        for key in self.__required_params:
            setattr(self, key, kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            # if key not provided in optional arguments, defaults value to None
            setattr(self, key, kwargs.get(key, None))

        # initialize filter values array
        self.values = []

        # class super class init
        super().__init__()
Example #10
0
    def __init__(self, **kwargs):

        # list to hold all the parameter names; will be accessed in super to 
        # construct dependency graph
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError('Required parameter %s for GHCNDInput not provided' % param)

        # set all required parameters
        for key in self.__required_params:
            setattr(self,key,kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            # if key not provided in optional arguments, defaults value to None
            setattr(self,key,kwargs.get(key,None))
            
        # set the hardcoded set of meterological params
        # can possibly generalize to fetch any list of params in the future
        self.met_params = ['SNOW','SNWD','TMAX','TMIN','PRCP']

        # class super class init
        super().__init__()
Example #11
0
    def process(self):

        # the R script is invoked with the following command line arguments:
        # 1. start year
        # 2. end year
        # 3. input directory where FAO files are stored
        # 4. output directory
        # 5. region map csv path
        # 6. region sets csv path
        # 7. crop sets csv path
        # 8. livestock sets csv path

        try:
            command = "Rscript"
            args = [
                str(self.start_year),
                str(self.end_year), self.fao_input_dir, self.target_path,
                self.regmaps_csv, self.regsets_csv, self.cropsets_csv,
                self.livestocksets_csv
            ]

            cmd = [command, self.data_clean_script] + args

            stdout = subprocess.check_output(cmd, universal_newlines=True)

        except CalledProcessError:
            raise GeoEDFError(
                'Error occurred when running SimpleDataClean processor: ',
                stdout)
Example #12
0
    def __init__(self, **kwargs):

        # list to hold all the parameter names; will be accessed in super to
        # construct dependency graph
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError(
                    'Required parameter %s for WQPInput not provided' % param)

# specific check for conditioal required params

# set all required parameters
        for key in self.__required_params:
            setattr(self, key, kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            # if key not provided in optional arguments, defaults value to None
            setattr(self, key, kwargs.get(key, None))

        # set defaults if none provided
        if (self.start_date is None):
            self.start_date = ''
        if (self.end_date is None):
            self.end_date = '05-01-2020'

        # class super class init
        super().__init__()
    def __init__(self, **kwargs):

        # list to hold all param names
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError(
                    'Required parameter %s for ReprojectShapefile not provided'
                    % param)

        # set all required parameters
        for key in self.__required_params:
            setattr(self, key, kwargs.get(key))

        proj_params = ['prjfile', 'prjepsg', 'prjwkt']

        # make sure exactly one of the projection params has been provided
        if len(set(kwargs.keys()).intersection(set(proj_params))) != 1:
            raise GeoEDFError(
                'Exactly one among the target projection file, EPSG code, or Well Known Text (WKT) is required'
            )

        # set optional parameters
        for key in self.__optional_params:
            # special error handling of the newname parameter; needs to be a filename
            if key == 'newname':
                val = kwargs.get(key, None)
                if val is not None:
                    if os.path.basename(val) != val:
                        raise GeoEDFError(
                            'The value of the newname parameter needs to be a filename and not a path'
                        )
                    else:
                        # make sure it has a .shp extension
                        if os.path.splitext(val)[1] != '.shp':
                            raise GeoEDFError(
                                'newname must have a .shp extension')
                # set the value
                setattr(self, key, val)
                continue

            # if key not provided in optional arguments, defaults value to None
            setattr(self, key, kwargs.get(key, None))

        super().__init__()
Example #14
0
def HDF_type(hdf_filepath):

    # determine if HDF4 or HDF5 (for now based on file extension alone)
    try:
        (ignore, hdf_filename) = os.path.split(hdf_filepath)
        (ignore, extension) = os.path.splitext(hdf_filename)
        if extension == '.hdf':
            hdftype = 'hdf4'
        elif extension == '.h5':
            hdftype = 'hdf5'
        else:
            raise GeoEDFError(
                'Could not determine HDF file type from file extension')
    except:
        raise GeoEDFError('Could not determine HDF file type')

    return hdftype
Example #15
0
    def get(self):

        # user provided scenarios to download
        user_scenarios = self.scenarios.split(',')

        # loop through scenarios available for this dam and download those that match the provided
        # scenario names
        r = requests.get(
            "https://fim.sec.usace.army.mil/ci/fim/getEAPLayers?id=" +
            self.dam_id)
        dam_scenarios = json.loads(r.content)
        for scenario in dam_scenarios:
            for user_scenario in user_scenarios:
                if user_scenario in scenario['displayName']:
                    # then download
                    link = "https://fim.sec.usace.army.mil/ci/download/start?LAYERID="\
                    + str(scenario["layerId"])\
                    + "&type=s3&RASTER_INFO_ID=" + str(scenario["rasterInfoID"])\
                    + "&TABLE=FLOOD_DEPTH&TABLE_ID=" + str(scenario["floodDepthID"])

                    #construct filename out of load and breach condition
                    fileName = '%s/%s_%s_%s.tiff' % (
                        self.target_path, scenario['loadCondition'],
                        scenario['breachCondition'], self.dam_id)
                    # download file
                    try:
                        file = urllib.request.urlretrieve(link, fileName)
                    except urllib.error.HTTPError as err:
                        raise GeoEDFError("DamFIMInput for %s   - HTTPError" %
                                          self.dam_id)
                    except requests.exceptions.ConnectionError as err:
                        raise GeoEDFError(
                            "DamFIMInput for %s   - ConnectionError" %
                            self.dam_id)
                    except requests.exceptions.Timeout:
                        raise GeoEDFError("DamFIMInput for %s   - Timeout" %
                                          self.dam_id)
                    except requests.exceptions.TooManyRedirects:
                        raise GeoEDFError(
                            "DamFIMInput for %s   - TooManyRedirects" %
                            self.dam_id)
                    except requests.exceptions.RequestException as e:
                        raise GeoEDFError("DamFIMInput for %s   - Error" %
                                          self.dam_id)
        return True
Example #16
0
    def process(self):
        
        # first read the CSV file to fetch the regions and their corresponding values
        regions = []
        vals = []

        # name of the data field
        data_key = None
        
        with open(self.csvfile,'r') as csvFileObj:
            reader = csv.DictReader(csvFileObj)
            for row in reader:
                # pre-process step only required once
                # determine the name of the data field
                if data_key is None:
                    if len(list(row.keys())) != 2:
                        raise GeoEDFError("Error in CSV2HAR when processing %s. Exactly two fields are required" % self.csvfile)
                    else:
                        # REG is one, what is the other?
                        for key in row.keys():
                            if key != 'REG':
                                data_key = key
                                break
                regions.append(row['REG'])
                vals.append(row[data_key])

        # now build the HAR file header
        # first create the HAR file object
        (ignore, csvFilename) = os.path.split(self.csvfile)
        basename = os.path.splitext(csvFilename)[0]
        harFilename = '%s/%s.har' % (self.target_path,basename)
        harFile = HarFileObj(harFilename)

        # create the two header array objects and set them to the file
        
        # first the region header
        # in this header, region names are always padded to 12 characters long
        reg_arr = np.array([reg.ljust(12) for reg in regions],dtype='<U12')
        reg_setNames = ['REG']
        reg_setElements = [[reg.ljust(12) for reg in regions]]
        reg_coeff_name = ''.ljust(12)
        reg_long_name = 'Set REG inferred from CSV file'.ljust(70)
        reg_header = HeaderArrayObj.HeaderArrayFromData(reg_arr,reg_coeff_name,reg_long_name,reg_setNames,dict(zip(reg_setNames,reg_setElements)))
        # add header to HAR file
        harFile["SET1"] = reg_header

        # then the csv data header
        csv_arr = np.array(vals,dtype='float32')
        csv_setNames = ['REG']
        csv_setElements = [regions]
        csv_coeff_name = 'CSVData'.ljust(12)
        csv_long_name = 'Array extracted from CSV'.ljust(70)
        csv_header = HeaderArrayObj.HeaderArrayFromData(csv_arr,csv_coeff_name,csv_long_name,csv_setNames,dict(zip(csv_setNames,csv_setElements)))
        harFile["CSV"] = csv_header

        # write out the HAR file
        harFile.writeToDisk()
Example #17
0
    def __init__(self, **kwargs):

        # list to hold all the parameter names; will be accessed in super to
        # construct dependency graph
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError(
                    'Required parameter %s for DateTimeFilter not provided' %
                    param)

        # specific check for conditionally required params
        # if end is provided, also need a period
        if 'end' in kwargs:
            if 'period' not in kwargs:
                raise GeoEDFError(
                    'Period is required for DateTimeFilter when both start and end are provided.'
                )

        # set all required parameters
        for key in self.__required_params:
            setattr(self, key, kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            # if key not provided in optional arguments, defaults value to None
            setattr(self, key, kwargs.get(key, None))
            # if has_time is not provided, set to False
            if key == 'has_time':
                if self.has_time is None:
                    self.has_time = False
            if key == 'exact_dates':
                if self.exact_dates is None:
                    self.exact_dates = False

        # initialize filter values array
        self.values = []

        # class super class init
        super().__init__()
Example #18
0
    def filter(self):

        # convert the start and end dates from strings to Pandas DateTime
        try:
            # check if time is present
            if self.has_time:
                start_date = pd.to_datetime(self.start,
                                            format='%m/%d/%Y %H:%M:%S')
            else:
                start_date = pd.to_datetime(self.start, format='%m/%d/%Y')
            if self.end is not None:
                if self.has_time:
                    end_date = pd.to_datetime(self.end,
                                              format='%m/%d/%Y %H:%M:%S')
                else:
                    end_date = pd.to_datetime(self.end, format='%m/%d/%Y')
        except ValueError as e:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DateTimeFilter : %s'
                % e)
        except:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DateTimeFilter'
            )

        # use the period to generate all intervening dates
        try:
            if self.end is not None:
                all_dates = pd.date_range(start=start_date,
                                          end=end_date,
                                          freq=self.period)
            else:
                all_dates = [start_date]

            # convert back to string using the pattern
            for dt in all_dates:
                self.values.append(dt.strftime(self.pattern))

        except ValueError as e:
            raise GeoEDFError('Error applying DateTimeFilter : %s' % e)
        except:
            raise GeoEDFError('Unknown error applying DateTimeFilter')
Example #19
0
def getFile(url, path=None):
    """ download file(s) at url and save to path
	if path is None, save to /tmp
	returns boolean result
    """

    # validate that URL is not null
    if url is None:
        raise GeoEDFError('Null URL provided for getFile')

    # default path to /tmp
    if path is None:
        path = '/tmp'

    try:
        # if there is a wildcard in the URL, we need to process a list of files instead
        if '*' in url:
            fileURLList = getFileList(url)
            for fileURL in fileURLList:
                res = requests.get(fileURL, stream=True)
                res.raise_for_status()

                # get the name of the file to save
                outFilename = getFilename(res, fileURL)
                outPath = '%s/%s' % (path, outFilename.strip('"'))
                with open(outPath, 'wb') as outFile:
                    for chunk in res.iter_content(chunk_size=1024 * 1024):
                        outFile.write(chunk)
            return True
        else:  # no wildcard
            res = requests.get(url, stream=True)
            res.raise_for_status()

            # get the name of the file to save
            outFilename = getFilename(res, url)
            outPath = '%s/%s' % (path, outFilename.strip('"'))
            with open(outPath, 'wb') as outFile:
                for chunk in res.iter_content(chunk_size=1024 * 1024):
                    outFile.write(chunk)
            return True

    except GeoEDFError:  # known error
        raise
    except requests.exceptions.HTTPError:
        raise
Example #20
0
def constructSpatialRef(prj_file=None,prj_epsg_code=None,prj_wkt=None):

    try:
        outSpatialRef = osr.SpatialReference()
        # if projection file provided, read the WKT
        if prj_file is not None:
            prjfile = open(prj_file, 'r')
            prj_txt = prjfile.read()
            outSpatialRef.ImportFromESRI([prj_txt])
        elif prj_epsg_code is not None:
            if prj_epsg_code.isdigit():
                outSpatialRef.ImportFromEPSG(int(prj_epsg_code))
        elif prj_wkt is not None:
            outSpatialRef.ImportFromESRI([prj_wkt])
        else:
            raise GeoEDFError('Non-null target projection file, EPSG code, or WKT is required')
        return outSpatialRef
    except:
        raise
Example #21
0
    def __init__(self, **kwargs):

        #list to hold all parameter names
        self.provided_params = self.__required_params + self.__optional_params

        # check that all required params have been provided
        for param in self.__required_params:
            if param not in kwargs:
                raise GeoEDFError('Required parameter %s for CSV2HAR not provided' % param)

        # set all required parameters
        for key in self.__required_params:
            setattr(self,key,kwargs.get(key))

        # set optional parameters
        for key in self.__optional_params:
            # if key not provided in optional arguments, defaults value to None
            setattr(self,key,kwargs.get(key,None))

        super().__init__()
Example #22
0
    def filter(self):

        try:
            # construct the catalog URL and attempt to retrieve it using requests

            catalog_url = '%s/catalog.xml' % self.opendap_url

            res = requests.get(catalog_url, stream=True)

            # temporarily save catalog file to directory holding eventual filter output

            outFilename = '%s/catalog.xml' % os.path.dirname(self.target_path)

            with open(outFilename, 'wb') as catalogFile:
                for chunk in res.iter_content(chunk_size=1024):
                    catalogFile.write(chunk)

            # parse catalog XML file
            tree = ET.parse(outFilename)

            root = tree.getroot()

            # assuming fixed format and namespaces
            # root > dataset > dataset array > access leaf

            # construct tag keys
            dataset_key = '%sdataset' % self.thredds_ns
            access_key = '%saccess' % self.thredds_ns

            for child in root.findall(dataset_key):
                for children in child.findall(dataset_key):
                    for access_child in children.findall(access_key):
                        if access_child.attrib['serviceName'] == 'dap':
                            dataset_path = access_child.attrib['urlPath']
                            filename = os.path.split(dataset_path)[1]
                            # construct direct access URL for NetCDF4 format
                            self.values.append('%s/%s.nc4' %
                                               (self.opendap_url, filename))
        except:
            raise GeoEDFError('Unknown error applying OpenDAPFilter')
def getFile(url, auth=None, path=None):
    """ download file(s) at url and save to path
	if path is None, save to /tmp
	auth is an optional dictionary with user and password
	returns boolean result
    """

    # validate that URL is not null
    if url is None:
        raise GeoEDFError('Null URL provided for getFile')

    # default path to /tmp
    if path is None:
        path = '/tmp'

    # if no auth provided, use an non-authenticated request
    # if insufficient/incorrect auth provided, return error
    try:
        if auth is None:
            raise GeoEDFError(
                'Authentication required for accessing NASA data')
        else:

            if validateAuth(auth):  # auth validated for completeness
                session = SessionWithHeaderRedirection(auth['user'],
                                                       auth['password'])
                # if there is a wildcard in the URL, we need to process a list of files instead
                if '*' in url:
                    fileURLList = getFileList(url, auth)
                    # recreate session object since file listing may not need auth
                    session = SessionWithHeaderRedirection(
                        auth['user'], auth['password'])
                    for fileURL in fileURLList:
                        res = session.get(fileURL, stream=True)
                        res.raise_for_status()

                        # get the name of the file to save
                        outFilename = getFilename(res, fileURL)
                        outPath = '%s/%s' % (path, outFilename)
                        with open(outPath, 'wb') as outFile:
                            for chunk in res.iter_content(chunk_size=1024 *
                                                          1024):
                                outFile.write(chunk)
                    return True
                else:  # no wildcard
                    res = session.get(url)
                    res.raise_for_status()

                    # get the name of the file to save
                    outFilename = getFilename(res, url)
                    outPath = '%s/%s' % (path, outFilename)
                    with open(outPath, 'wb') as outFile:
                        for chunk in res.iter_content(chunk_size=1024 * 1024):
                            outFile.write(chunk)
                    return True

            else:  # auth could not be validated
                raise GeoEDFError('Invalid authentication provided!')

    except GeoEDFError:  # known error
        raise
    except requests.exceptions.HTTPError:
        raise
Example #24
0
    def filter(self):

        # first transform comma separated gage IDs into a list of strings
        gage_ids = self.gages.rstrip().split(',')

        # since HF cannot handle a large number of station IDs, split into chunks of 100
        num_split = math.ceil(len(gage_ids) / 100)

        gage_id_chunks = np.array_split(gage_ids, num_split)

        # semantic checks on params
        # Check (1) start and end date are dates and in right order
        try:
            start_date = pd.to_datetime(self.start, format='%m/%d/%Y')
            end_date = pd.to_datetime(self.end, format='%m/%d/%Y')
        except ValueError as e:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DischargeDateFilter : %s'
                % e)
        except:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DischargeDataFilter'
            )

        if start_date > end_date:
            raise GeoEDFError(
                'Start date cannot be later than end date in DischargeFilter')

        # make sure cutoff is an integer < 100
        try:
            self.cutoff = int(self.cutoff)
            if self.cutoff < 1 or self.cutoff > 100:
                raise GeoEDFError(
                    'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100'
                )
        except:
            raise GeoEDFError(
                'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100'
            )

        # next query Hydrofunctions for discharge data for the provided gages
        # 00060 is discharge parameter
        try:
            # process each chunk separately and merge the resulting dataframes
            # discharges holds the merged DF
            discharges = None
            for gage_chunk in gage_id_chunks:
                chunk_data = hf.NWIS(
                    list(gage_chunk),
                    'dv',
                    start_date=start_date.strftime('%Y-%m-%d'),
                    end_date=end_date.strftime('%Y-%m-%d'),
                    parameterCd='00060')
                if discharges is None:
                    discharges = chunk_data.df()
                else:
                    # simple merge
                    discharges = discharges.merge(chunk_data.df(),
                                                  how='outer',
                                                  left_index=True,
                                                  right_index=True)

            # get the statistics of retrieved data, we are looking for count
            # in order to filter by coverage %
            stn_data = discharges.describe()

            # maximum data available
            max_count = stn_data.loc['count'].max()

            # cutoff number of days
            count_cutoff = (max_count * self.cutoff) / 100

            # filter by availability
            keep_stn = (stn_data.loc['count'] >= count_cutoff)

            valid_stns = keep_stn[keep_stn].index.to_list()

            # clean up station IDs since the returned IDs have USGS:####:param format
            filtered_ids = list(
                map(lambda stn_id: stn_id.split(':')[1], valid_stns))

            # if any remain, set the return value to a comma separated list of these IDs
            if len(filtered_ids) > 0:
                self.values.append(','.join(filtered_ids))
        except:
            raise GeoEDFError(
                "Error retrieving discharge data for gages in DischargeDataFilter"
            )
Example #25
0
    def filter(self):

        # semantic checks on params
        # Check (1) exactly four values need to be provided in extent
        extent_vals = list(map((lambda val: float(val)),self.extent.split(',')))

        if len(extent_vals) != 4:
            raise GeoEDFError('NOAAStationFilter requires a N,S,E,W string of floating point numbers as the extent')

        # Check (2) that lat and lon pairs are in the right order
        north = extent_vals[0]
        south = extent_vals[1]
        east = extent_vals[2]
        west = extent_vals[3]

        if south > north:
            raise GeoEDFError('please check the ordering of the south and north extents')
        
        if west > east:
            raise GeoEDFError('please check the ordering of the east and west extents')
            
        # passed semantic checks, prepare dict of extents for API
        extent_dict = {"north": north, "south": south, "east": east, "west": west}
        
        # process dates
        try:
            startdate = pd.to_datetime(self.start_date,format='%m/%d/%Y')
            enddate = pd.to_datetime(self.end_date,format='%m/%d/%Y')
        except:
            raise GeoEDFError("Error parsing dates provided to NOAAStationFiler, please ensure format is mm/dd/YYYY")
            
        # param checks complete
        try:
            # get a client for NCDC API usage
            cdo_client = Client(self.token, default_units="None", default_limit=1000)

            # we are looking for stations with GHCND data
            #The find_stations function returns the dataframe containing stations' info within the input extent.
            stations = cdo_client.find_stations(
                            datasetid="GHCND",
                            extent=extent_dict,
                            startdate=startdate,
                            enddate=enddate,
                            return_dataframe=True)
            
            # filter to only retain stations which have sufficient data for the date range
            stations_to_drop = []
            # Drop stations without enough observations for the given date range
            for i in range(len(stations.maxdate)):
                # get max and min date of each station
                station_maxdate = pd.to_datetime(stations.maxdate[i],format='%Y-%m-%d')
                station_mindate = pd.to_datetime(stations.mindate[i],format='%Y-%m-%d')
                # check if station's maxdate is earlier than enddate
                if station_maxdate < enddate:
                    stations_to_drop.append(i)
                # check if station's mindate is later than startdate
                if station_mindate > startdate:
                    stations_to_drop.append(i)
                    
            # delete stations without enough time length
            valid_stations = stations.drop(stations.index[stations_to_drop])
            
            # add station IDs to values array
            self.values += list(valid_stations.id)
                
        except:
            raise GeoEDFError('Error occurred when querying NCDC API for stations in NOAAStationFiler')
Example #26
0
    def process(self):

        # first reproject the shapefile to WGS84; all processing will happen in lat-lon
        # use the ReprojectShapefile processor

        # Set the name of this new shapefile based on the HDF filename
        (ignore, hdffilename) = os.path.split(self.hdffile)
        tmpfilename = '%s.shp' % hdffilename

        # reproject shapefile
        try:
            # first get the HDF file's native projection
            #hdf_proj_wkt = HDFEOSHelper.HDF_proj_WKT(self.hdffile)
            shapefileReprojector = ReprojectShapefile(shapefile=self.shapefile,
                                                      prjepsg='4326',
                                                      newname=tmpfilename)
            shapefileReprojector.target_path = self.target_path
            #shapefileReprojector = ReprojectShapefile(shapefile=self.shapefile,destdir=self.destdir,prjwkt=hdf_proj_wkt,newname=tmpfilename)
            shapefileReprojector.process()
            shapefile_wgs84 = '%s/%s' % (self.target_path, tmpfilename)
        except:
            raise GeoEDFError(
                'Error reprojecting input shapefile, cannot proceed with masking HDF data'
            )

        # now process the HDF file's subdatasets
        # get the data matrix for the selected subdatasets
        hdf_data = HDFEOSHelper.HDF_subdataset_data(self.hdffile,
                                                    self.datasets)

        # get the lat-lon for the corner coordinates
        #(upperLeftX, upperLeftY, lowerRightX, lowerRightY) = HDFEOSHelper.HDF_corner_coords(self.hdffile)
        (upperLeftX, upperLeftY, lowerRightX, lowerRightY) = (-180, 90, 180,
                                                              -90)

        # get the grid dimensions of the data
        hdf_sample_data = next(iter(hdf_data.values()))['data']
        num_rows = hdf_sample_data.shape[0]
        num_cols = hdf_sample_data.shape[1]

        #print(num_rows,num_cols)

        # determine area of a single grid cell; assume equal size grids
        grid_cell_width = (lowerRightX - upperLeftX) / num_cols
        grid_cell_height = (upperLeftY - lowerRightY) / num_rows
        grid_cell_rect = ogr.Geometry(ogr.wkbLinearRing)
        grid_cell_rect.AddPoint(upperLeftX, upperLeftY)
        grid_cell_rect.AddPoint(upperLeftX + grid_cell_width, upperLeftY)
        grid_cell_rect.AddPoint(upperLeftX + grid_cell_width,
                                upperLeftY - grid_cell_height)
        grid_cell_rect.AddPoint(upperLeftX, upperLeftY - grid_cell_height)
        grid_cell_rect.AddPoint(upperLeftX, upperLeftY)
        grid_cell_geom = ogr.Geometry(ogr.wkbPolygon)
        grid_cell_geom.AddGeometry(grid_cell_rect)
        grid_cell_area = grid_cell_geom.Area()

        shp_driver = ogr.GetDriverByName("ESRI Shapefile")
        mask_shp_data_source = shp_driver.Open(shapefile_wgs84, 1)
        mask_shp_layer = mask_shp_data_source.GetLayer()
        #print(mask_shp_layer.GetExtent())

        # add new fields to store the aggregate value for each subdataset
        for key in hdf_data.keys():
            # dbfs only allow for field names up to 10 characters long
            key_10char = key[0:10]
            mask_shp_layer.CreateField(ogr.FieldDefn(key_10char, ogr.OFTReal))

        # loop through shapefile features, determining different subdataset aggregate value for each
        for ignore, mask_shp_feature in enumerate(mask_shp_layer):
            mask_shp_feature_geom = mask_shp_feature.GetGeometryRef()
            mask_shp_feature_area = mask_shp_feature_geom.Area()

            # initialize dictionary of aggregate data for each hdf subdataset for the current feature
            feature_hdf_data = dict()
            for key in hdf_data.keys():
                feature_hdf_data[key] = 0.0

            # factor to weigh aggregate data by based on intersection areas with each grid cell
            feature_weight = 0.0

            # get the bounds of the feature
            mask_shp_feature_geom.FlattenTo2D()
            x_min, x_max, y_min, y_max = mask_shp_feature_geom.GetEnvelope()

            # optimization to only process intersecting rows and columns rather than all grids
            j_low = max(0, int((x_min - upperLeftX) / grid_cell_width) - 1)
            j_high = min(num_cols,
                         int((x_max - upperLeftX) / grid_cell_width) + 1)
            i_low = max(0, int((upperLeftY - y_max) / grid_cell_height) - 1)
            i_high = min(num_rows,
                         int((upperLeftY - y_min) / grid_cell_height) + 1)

            num_cells = 0
            num_cells_0 = 0
            num_cells_1 = 0
            num_cells_partial = 0

            # loop through grid cells, checking for intersection with feature and aggregating
            # weighted value for each subdataset
            for i in range(i_low, i_high):
                # further optimize by determining the subset of columns that are relevant for this row
                row_rect = ogr.Geometry(ogr.wkbLinearRing)
                row_rect.AddPoint(upperLeftX + j_low * grid_cell_width,
                                  upperLeftY - i * grid_cell_height)
                row_rect.AddPoint(upperLeftX + (j_high + 1) * grid_cell_width,
                                  upperLeftY - i * grid_cell_height)
                row_rect.AddPoint(upperLeftX + (j_high + 1) * grid_cell_width,
                                  upperLeftY - (i + 1) * grid_cell_height)
                row_rect.AddPoint(upperLeftX + j_low * grid_cell_width,
                                  upperLeftY - (i + 1) * grid_cell_height)
                row_rect.AddPoint(upperLeftX + j_low * grid_cell_width,
                                  upperLeftY - i * grid_cell_height)
                row_geom = ogr.Geometry(ogr.wkbPolygon)
                row_geom.AddGeometry(row_rect)

                row_intersection_geom = row_geom.Intersection(
                    mask_shp_feature_geom)
                row_intersection_area = row_intersection_geom.Area()
                if row_intersection_geom != None and row_intersection_area > 0.0:
                    row_x_min, row_x_max, row_y_min, row_y_max = row_intersection_geom.GetEnvelope(
                    )
                    new_j_low = max(
                        0,
                        int((row_x_min - upperLeftX) / grid_cell_width) - 1)
                    new_j_high = min(
                        num_cols,
                        int((row_x_max - upperLeftX) / grid_cell_width) + 1)
                else:
                    row_intersection_geom = mask_shp_feature_geom
                    new_j_low = j_low
                    new_j_high = j_high

                for j in range(new_j_low, new_j_high):

                    num_cells = num_cells + 1

                    # get the cell value
                    cell_val = hdf_data[key]['data'][i][j]

                    # TODO: move this out of the loop
                    # if this cell doesn't contain a valid value, skip
                    if False and 'range' in hdf_data[key]:
                        val_range = hdf_data[key]['range']
                        if not (val_range[0] < cell_val < val_range[1]):
                            continue
                    else:  # skip cells that contain the "nodata" value
                        if 'fillValue' in hdf_data[key]:
                            fillValue = hdf_data[key]['fillValue']
                            if cell_val == fillValue or cell_val == 0 - fillValue:
                                continue

                    # construct a grid cell based on the lat-lon values for this grid row and column
                    cell_rect = ogr.Geometry(ogr.wkbLinearRing)
                    cell_rect.AddPoint(upperLeftX + j * grid_cell_width,
                                       upperLeftY - i * grid_cell_height)
                    cell_rect.AddPoint(upperLeftX + (j + 1) * grid_cell_width,
                                       upperLeftY - i * grid_cell_height)
                    cell_rect.AddPoint(upperLeftX + (j + 1) * grid_cell_width,
                                       upperLeftY - (i + 1) * grid_cell_height)
                    cell_rect.AddPoint(upperLeftX + j * grid_cell_width,
                                       upperLeftY - (i + 1) * grid_cell_height)
                    cell_rect.AddPoint(upperLeftX + j * grid_cell_width,
                                       upperLeftY - i * grid_cell_height)
                    cell_geom = ogr.Geometry(ogr.wkbPolygon)
                    cell_geom.AddGeometry(cell_rect)

                    # check to see if the grid cell intersects the column intersection geometry
                    # get the overlap area to weight the aggregation calculation
                    if (cell_geom.Disjoint(row_intersection_geom)
                        ):  # the geometries are disjoint
                        num_cells_0 = num_cells_0 + 1
                        cell_intersection_area = 0.0
                    elif (cell_geom.Within(row_intersection_geom)
                          ):  # grid cell is fully contained
                        num_cells_1 = num_cells_1 + 1
                        cell_intersection_area = grid_cell_area
                    else:
                        num_cells_partial = num_cells_partial + 1
                        cell_intersection_geom = cell_geom.Intersection(
                            row_intersection_geom
                        )  # grid cell intersects feature
                        if (cell_intersection_geom != None):
                            cell_intersection_area = cell_intersection_geom.Area(
                            )
                        else:
                            cell_intersection_area = 0.0

                    # grid cell does not intersect the feature
                    if (cell_intersection_area <= 0.0):
                        continue

                    # add the weighted contribution of this grid cell to the feature value for each subdataset
                    for key in hdf_data.keys():
                        feature_hdf_data[key] += hdf_data[key]['data'][i][
                            j] * cell_intersection_area / grid_cell_area

                    feature_weight += cell_intersection_area / grid_cell_area

            # done with loop over grid cells, compute actual weighted aggregate value for feature
            for key in hdf_data.keys():
                key_10char = key[0:10]
                if feature_weight > 0.0:
                    feature_hdf_data[
                        key] = feature_hdf_data[key] / feature_weight
                else:
                    feature_hdf_data[key] = 0.0

                # set the value for this subdataset field on the feature
                mask_shp_feature.SetField(key_10char, feature_hdf_data[key])
                mask_shp_layer.SetFeature(mask_shp_feature)
                mask_shp_data_source.SyncToDisk()

        # close the result shapefile
        mask_shp_layer = None
        mask_shp_data_source.SyncToDisk()
        mask_shp_data_source = None
Example #27
0
    def filter(self):

        # semantic checks on params
        # Check (1) exactly four values need to be provided in extent
        extent_vals = list(
            map((lambda val: int(float(val))), self.extent.split(',')))

        if len(extent_vals) != 4:
            raise GeoEDFError(
                'GeoRangeFilter requires a latmin,latmax,lonmin,lonmax string as the extent'
            )

        # Check (2) that lat and lon pairs are in the right order
        self.latmin = extent_vals[0]
        self.latmax = extent_vals[1]
        self.lonmin = extent_vals[2]
        self.lonmax = extent_vals[3]

        if self.latmin > self.latmax:
            raise GeoEDFError(
                'extent[0] and extent[1] need to be the latmin and latmax; please check the ordering'
            )

        if self.lonmin > self.lonmax:
            raise GeoEDFError(
                'extent[2] and extent[3] need to be the lonmin and lonmax; please check the ordering'
            )

        try:
            # first produce all intermediate values for lat and lon pairs
            # increment max by 1 since int(float()) returns floor
            if self.latmax <= 0:
                # latmin is also < 0 since we've checked ordering
                # we take abs value, flip for right range, and produce S#
                lat_range = list(range(abs(self.latmax), abs(self.latmin) + 2))
                lat_vals = list(
                    map((lambda lat_val: 's%d' % lat_val), lat_range))
            else:  #latmax is > 0
                if self.latmin < 0:
                    # need to split into two ranges; upto 0 and then > 0
                    lat_range1 = list(range(0, abs(self.latmin) + 2))
                    lat_vals = list(
                        map((lambda lat_val: 's%d' % lat_val), lat_range1))

                    lat_range2 = list(range(0, self.latmax + 2))
                    lat_vals += list(
                        map((lambda lat_val: 'n%d' % lat_val), lat_range2))
                else:  #latmin is >= 0
                    lat_range = list(range(self.latmin, self.latmax + 2))
                    lat_vals = list(
                        map((lambda lat_val: 'n%d' % lat_val), lat_range))

            # process lon values
            if self.lonmax <= 0:
                # lonmin is also < 0 since we've checked ordering
                # we take abs value, flip for right range, and produce S#
                lon_range = list(range(abs(self.lonmax), abs(self.lonmin) + 2))
                lon_vals = list(
                    map((lambda lon_val: 'w%03d' % lon_val), lon_range))
            else:  #lonmax is > 0
                if self.lonmin < 0:
                    # need to split into two ranges; upto 0 and then > 0
                    lon_range1 = list(range(0, abs(self.lonmin) + 2))
                    lon_vals = list(
                        map((lambda lon_val: 'w%03d' % lon_val), lon_range1))

                    lon_range2 = list(range(0, self.lonmax + 2))
                    lon_vals += list(
                        map((lambda lon_val: 'e%03d' % lon_val), lon_range2))
                else:  #lonmin is >= 0
                    lon_range = list(range(self.lonmin, self.lonmax + 2))
                    lon_vals = list(
                        map((lambda lon_val: 'e%03d' % lon_val), lon_range))

            # concatenate the lat and lon vals to produce a single string
            for lat_val in lat_vals:
                for lon_val in lon_vals:
                    self.values.append(lat_val + lon_val)

        except:
            raise GeoEDFError(
                'Unknown error occurred when attempting to construct filter values'
            )
    def process(self):
        # set the reprojected file output name and path
        # if a new name has not been provided, reuse the source filename
        # since the output directory is always new, there won't be a clash
        if self.newname is not None:
            outfilename = self.newname
        else:
            (ignore, outfilename) = os.path.split(self.shapefile)

        (outfileshortname, extension) = os.path.splitext(outfilename)
        outfilepath = '%s/%s' % (self.target_path, outfilename)

        driver = ogr.GetDriverByName('ESRI Shapefile')
        indataset = driver.Open(self.shapefile, 0)
        if indataset is None:
            raise GeoEDFError(
                'Error opening shapefile %s in ReprojectShapefile processor' %
                self.shapefile)
        inlayer = indataset.GetLayer()
        try:
            inSpatialRef = inlayer.GetSpatialRef()
        except:
            raise GeoEDFError(
                'Error determining projection of input shapefile, cannot reproject'
            )

        # construct the desired output projection
        try:
            outSpatialRef = ProjectionHelper.constructSpatialRef(
                self.prjfile, self.prjepsg, self.prjwkt)
        except BaseException as e:
            raise GeoEDFError(
                'Error occurred when constructing target projection: %s' % e)

        # create Coordinate Transformation
        coordTransform = osr.CoordinateTransformation(inSpatialRef,
                                                      outSpatialRef)

        # Create the output shapefile
        outdataset = driver.CreateDataSource(outfilepath)
        if outdataset is None:
            raise GeoEDFError('Error creating reprojected shapefile %s',
                              outfile)

        outlayer = outdataset.CreateLayer(outfileshortname,
                                          geom_type=inlayer.GetGeomType())

        # add fields
        inLayerDefn = inlayer.GetLayerDefn()
        for i in range(0, inLayerDefn.GetFieldCount()):
            fieldDefn = inLayerDefn.GetFieldDefn(i)
            outlayer.CreateField(fieldDefn)

        featureDefn = outlayer.GetLayerDefn()
        infeature = inlayer.GetNextFeature()
        while infeature:
            #get the input geometry
            geometry = infeature.GetGeometryRef()
            #reproject the geometry, each one has to be projected seperately
            geometry.Transform(coordTransform)
            #create a new output feature
            outfeature = ogr.Feature(featureDefn)
            #set the geometry and attribute
            outfeature.SetGeometry(geometry)
            #set field values from input shapefile
            #for i in range(0, featureDefn.GetFieldCount()):
            #    outfeature.SetField(featureDefn.GetFieldDefn(i).GetNameRef(), infeature.GetField(i))
            #add the feature to the output shapefile
            outlayer.CreateFeature(outfeature)
            #destroy the features and get the next input features
            outfeature.Destroy
            infeature.Destroy
            infeature = inlayer.GetNextFeature()

        #close the shapefiles
        indataset.Destroy()
        outdataset.Destroy()

        #create the new prj projection file
        outSpatialRef.MorphToESRI()
        outPrjFileName = '%s/%s.prj' % (self.target_path, outfileshortname)
        outPrjFile = open(outPrjFileName, 'w')
        outPrjFile.write(outSpatialRef.ExportToWkt())
        outPrjFile.close()
Example #29
0
def HDF_subdataset_data(hdf_filepath, subdataset_substrs):

    # process the names of the subdatasets, finding any that contain a member of
    # subdataset_substrs as a substring
    # subdataset_substrs is a list

    # returned dictionary indexed by subdataset name
    # contains data grid and value range
    hdf_data = dict()

    # first determine the HDF type
    hdf_type = HDF_type(hdf_filepath)

    if hdf_type == 'hdf4':
        hdf_file = SD(hdf_filepath, SDC.READ)
        try:
            dset_names = hdf_file.datasets().keys()
            # loop through input subdataset substrings
            for subdset_substr in subdataset_substrs:
                # loop through datasets in HDF file
                for dset_name in dset_names:
                    # if substring found
                    if subdset_substr in dset_name:
                        # if this subdataset has not been processed before
                        if dset_name not in hdf_data:
                            try:
                                data2D = hdf_file.select(dset_name)
                                data = data2D[:, :].astype(np.float64)
                                hdf_data[dset_name] = dict()
                                hdf_data[dset_name]['data'] = data
                                #hdf_data[dset_name]['range'] = data2D.getrange()
                                hdf_data[dset_name][
                                    'fillValue'] = data2D.getfillvalue()
                            except:
                                raise GeoEDFError(
                                    'Error retrieving subdataset %s data from HDF file'
                                    % dset_name)
        except:
            raise GeoEDFError(
                'Error retrieving subdatasets from HDF4 file %s' %
                hdf_filepath)
    else:
        hdf_file = h5py.File(hdf_filepath, mode='r')
        # assume this follows the structure of HDF-EOS files where all subdatasets are in a "Geophysical_Data" group
        if 'Geophysical_Data' in hdf_file.keys():
            dset_names = hdf_file['Geophysical_Data'].keys()
            # loop through input subdataset substrings
            for subdset_substr in subdataset_substrs:
                # loop through subdatasets in HDF file
                for dset_name in dset_names:
                    # if substring matches
                    if subdset_substr in dset_name:
                        # if subdataset not processed yet
                        if dset_name not in hdf_data:
                            try:
                                # construct fully qualified subdataset name
                                fq_dset_name = '/Geophysical_Data/%s' % dset_name
                                data = hdf_file[fq_dset_name]
                                hdf_data[dset_name] = dict()
                                hdf_data[dset_name]['data'] = data[:]
                                hdf_data[dset_name][
                                    'fillValue'] = data.fillvalue
                            except:
                                raise GeoEDFError(
                                    'Error retrieving subdataset %s data from HDF file'
                                    % dset_name)
        else:
            raise GeoEDFError(
                'Cannot handle HDF5 files that do not follow the HDF-EOS standards'
            )

    return hdf_data
Example #30
0
def HDF_corner_coords(hdf_filepath):

    # return a tuple of upper left and lower right coordinates in lat-lon

    # first determine the HDF type
    hdf_type = HDF_type(hdf_filepath)

    if hdf_type == 'hdf4':
        # for HDF4 assume corner coordinates are stored in the StructMetadata.0 section
        hdf_file = SD(hdf_filepath, SDC.READ)

        try:
            # access grid metadata section of StructMetadata.0
            fattr = hdf_file.attributes(full=1)
            structmeta = fattr['StructMetadata.0']
            gridmeta = structmeta[0]

            # parse the text to retrieve corner coordinates in meters
            ul_regex = re.compile(
                r'''UpperLeftPointMtrs=\(
                                  (?P<upper_left_x>[+-]?\d+\.\d+)
                                  ,
                                  (?P<upper_left_y>[+-]?\d+\.\d+)
                                  \)''', re.VERBOSE)
            match = ul_regex.search(gridmeta)
            x0 = np.float(match.group('upper_left_x'))
            y0 = np.float(match.group('upper_left_y'))

            lr_regex = re.compile(
                r'''LowerRightMtrs=\(
                                  (?P<lower_right_x>[+-]?\d+\.\d+)
                                  ,
                                  (?P<lower_right_y>[+-]?\d+\.\d+)
                                  \)''', re.VERBOSE)
            match = lr_regex.search(gridmeta)
            x1 = np.float(match.group('lower_right_x'))
            y1 = np.float(match.group('lower_right_y'))

            # construct the projection transformer to convert from meters to lat-lon

            # determine the projection GCTP code from the grid metadata
            proj_regex = re.compile(r'''Projection=(?P<projection>\w+)''',
                                    re.VERBOSE)
            match = proj_regex.search(gridmeta)
            proj = match.group('projection')

            # support MODIS sinusoidal projection for now, add others later
            if proj == 'GCTP_SNSOID':
                #sinu = pyproj.Proj("+proj=sinu +R=6371007.181 +nadgrids=@null +wktext")
                #wgs84 = pyproj.Proj("+init=EPSG:4326")
                #lon0, lat0 = pyproj.transform(sinu, wgs84, x0, y0)
                #lon1, lat1 = pyproj.transform(sinu, wgs84, x1, y1)

                #return (lon0, lat0, lon1, lat1)
                return (x0, y0, x1, y1)

            else:
                raise GeoEDFError(
                    'Only MODIS sinusoidal grids are supported currently')

        except Exception as e:
            #x0, y0, x1, y1 = -17357881.81713629,7324184.56362408,17357881.81713629,-7324184.56362408
            #return (x0,y0,x1,y1)
            raise GeoEDFError(
                'Error retrieving corner coordinates of HDF file')

    else:  # HDF5 file; only SMAP files in EASE Grid 2.0 are supported at the moment
        hdf_file = h5py.File(hdf_filepath, mode='r')

        # check to see if this is a EASE Grid 2.0 file
        if 'EASE2_global_projection' in hdf_file.keys():
            # hardcoded corner coordinates, since this is not stored in the file metadata
            x0, y0, x1, y1 = -17357881.81713629, 7324184.56362408, 17357881.81713629, -7324184.56362408

            #ease = pyproj.Proj(("+proj=cea +lat_0=0 +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m"))
            #wgs84 = pyproj.Proj("+init=EPSG:4326")
            #lon0, lat0 = pyproj.transform(ease, wgs84, x0, y0)
            #lon1, lat1 = pyproj.transform(ease, wgs84, x1, y1)

            #return (lon0, lat0, lon1, lat1)
            return (x0, y0, x1, y1)

        else:
            raise GeoEDFError(
                'Only EASE Grid 2.0 HDF5 files are supported currently')