def _handle_polygon(self, polygon_elem):
        elem = extract_elem(polygon_elem, ['polygon', 'Polygon'])
        srs_name = elem.attrib.get('srsName', 'EPSG:4326')

        geom = gml_to_geom(elem)
        if srs_name != '':
            geom = reproject(geom, srs_name, 'EPSG:4326')

        # TODO: generate the envelope?
        return {"dc:spatial": to_wkt(geom)}
Beispiel #2
0
    def _handle_polygon(self, polygon_elem):
        elem = extract_elem(polygon_elem, ['polygon', 'Polygon'])
        srs_name = elem.attrib.get('srsName', 'EPSG:4326')

        geom = gml_to_geom(elem)
        if srs_name != '':
            geom = reproject(geom, srs_name, 'EPSG:4326')

        # TODO: generate the envelope?
        return {"dc:spatial": to_wkt(geom)}
Beispiel #3
0
    def parse_item(self, elem):
        identifier = extract_item(self.elem, ['Entry_ID'])
        title = extract_item(self.elem, ['Entry_Title'])
        keywords = extract_items(self.elem, ['Keyword'])
        keywords += extract_items(self.elem, ['ISO_Topic_Category'])
        abstract = extract_item(self.elem, ['Summary'])
        organization = extract_item(self.elem, ['Originating_Center'])

        # temporal extent
        start_date = extract_item(self.elem,
                                  ['Temporal_Coverage', 'Start_Date'])
        end_date = extract_item(self.elem, ['Temporal_Coverage', 'End_Date'])
        temporal = [start_date, end_date] if start_date and end_date else []

        # spatial extent
        west = extract_item(self.elem,
                            ['Spatial_Coverage', 'Westernmost_Longitude'])
        east = extract_item(self.elem,
                            ['Spatial_Coverage', 'Easternmost_Longitude'])
        south = extract_item(self.elem,
                             ['Spatial_Coverage', 'Southernmost_Latitude'])
        north = extract_item(self.elem,
                             ['Spatial_Coverage', 'Northernmost_Latitude'])
        bbox = [west, south, east, north] if \
            west and east and north and south else []
        bbox = bbox_to_geom(bbox)
        bbox = to_wkt(bbox)

        distributions = []
        for related_url in extract_elems(self.elem, ['Related_URL']):
            url = extract_item(related_url, ['URL'])
            content_type = extract_item(related_url,
                                        ['URL_Content_Type', 'Type'])
            description = extract_item(related_url, ['Description'])
            dist = tidy_dict({
                "url": url,
                "description": description,
                "content_type": content_type
            })
            if dist:
                distributions.append(dist)

        return tidy_dict({
            "id": identifier,
            "title": title,
            "keywords": keywords,
            "abstract": abstract,
            "organization": organization,
            "bbox": bbox,
            "temporal": temporal,
            "distributions": distributions
        })
    def parse_item(self, elem):
        identifier = extract_item(self.elem, ['Entry_ID'])
        title = extract_item(self.elem, ['Entry_Title'])
        keywords = extract_items(self.elem, ['Keyword'])
        keywords += extract_items(self.elem, ['ISO_Topic_Category'])
        abstract = extract_item(self.elem, ['Summary'])
        organization = extract_item(self.elem, ['Originating_Center'])

        # temporal extent
        start_date = extract_item(
            self.elem, ['Temporal_Coverage', 'Start_Date'])
        end_date = extract_item(self.elem, ['Temporal_Coverage', 'End_Date'])
        temporal = [start_date, end_date] if start_date and end_date else []

        # spatial extent
        west = extract_item(
            self.elem, ['Spatial_Coverage', 'Westernmost_Longitude'])
        east = extract_item(
            self.elem, ['Spatial_Coverage', 'Easternmost_Longitude'])
        south = extract_item(
            self.elem, ['Spatial_Coverage', 'Southernmost_Latitude'])
        north = extract_item(
            self.elem, ['Spatial_Coverage', 'Northernmost_Latitude'])
        bbox = [west, south, east, north] if \
            west and east and north and south else []
        bbox = bbox_to_geom(bbox)
        bbox = to_wkt(bbox)

        distributions = []
        for related_url in extract_elems(self.elem, ['Related_URL']):
            url = extract_item(related_url, ['URL'])
            content_type = extract_item(
                related_url, ['URL_Content_Type', 'Type'])
            description = extract_item(related_url, ['Description'])
            dist = tidy_dict({
                "url": url,
                "description": description,
                "content_type": content_type
            })
            if dist:
                distributions.append(dist)

        return tidy_dict({
            "id": identifier,
            "title": title,
            "keywords": keywords,
            "abstract": abstract,
            "organization": organization,
            "bbox": bbox,
            "temporal": temporal,
            "distributions": distributions
        })
    def _handle_bbox(self, elem):
        west = extract_item(elem, ['westBoundLongitude', 'Decimal'])
        west = float(west) if west else 0

        east = extract_item(elem, ['eastBoundLongitude', 'Decimal'])
        east = float(east) if east else 0

        south = extract_item(elem, ['southBoundLatitude', 'Decimal'])
        south = float(south) if south else 0

        north = extract_item(elem, ['northBoundLatitude', 'Decimal'])
        north = float(north) if north else 0

        bbox = [west, south, east, north] \
            if east and west and north and south else []

        geom = bbox_to_geom(bbox)
        return {
            "dc:spatial": to_wkt(geom),
            "esip:westBound": west,
            "esip:eastBound": east,
            "esip:southBound": south,
            "esip:northBound": north
        }
Beispiel #6
0
    def _handle_bbox(self, elem):
        west = extract_item(elem, ['westBoundLongitude', 'Decimal'])
        west = float(west) if west else 0

        east = extract_item(elem, ['eastBoundLongitude', 'Decimal'])
        east = float(east) if east else 0

        south = extract_item(elem, ['southBoundLatitude', 'Decimal'])
        south = float(south) if south else 0

        north = extract_item(elem, ['northBoundLatitude', 'Decimal'])
        north = float(north) if north else 0

        bbox = [west, south, east, north] \
            if east and west and north and south else []

        geom = bbox_to_geom(bbox)
        return {
            "dc:spatial": to_wkt(geom),
            "esip:westBound": west,
            "esip:eastBound": east,
            "esip:southBound": south,
            "esip:northBound": north
        }
Beispiel #7
0
    def parse_item(self):
        output = {}

        urls = set()

        catalog_object_id = generate_uuid_urn()

        output['catalog_record'] = {
            "object_id": catalog_object_id,
            "bcube:dateCreated": self.harvest_details.get('harvest_date', ''),
            "bcube:lastUpdated": self.harvest_details.get('harvest_date', ''),
            # "dc:conformsTo": extract_attrib(
            #     self.elem, ['@noNamespaceSchemaLocation']).split(),
            "rdf:type": "FGDC:CSDGM",
            "relationships": [],
            "urls": []
        }
        output['urls'] = []

        # add the harvest info
        # this is not necessary as a sha just for set inclusion
        url_sha = generate_sha_urn(self.url)
        urls.add(url_sha)
        original_url = self._generate_harvest_manifest(
            **{
                "bcube:hasUrlSource": "Harvested",
                "bcube:hasConfidence": "Good",
                "vcard:hasURL": self.url,
                "object_id": generate_uuid_urn(),
                "dc:identifier": url_sha
            })
        output['catalog_record']['urls'].append(original_url)
        # NOTE: this is not the sha from the url
        output['catalog_record']['relationships'].append({
            "relate":
            "bcube:originatedFrom",
            "object_id":
            original_url['object_id']
        })

        datsetid = extract_item(self.elem, ['idinfo', 'datsetid'])
        dataset_object_id = generate_uuid_urn()

        dataset = {
            "object_id":
            dataset_object_id,
            "dcterms:identifier":
            datsetid,
            "bcube:dateCreated":
            self.harvest_details.get('harvest_date', ''),
            "bcube:lastUpdated":
            self.harvest_details.get('harvest_date', ''),
            "dc:description":
            extract_item(self.elem, ['idinfo', 'descript', 'abstract']),
            "dcterms:title":
            extract_item(self.elem,
                         ['idinfo', 'citation', 'citeinfo', 'title']),
            "urls": [],
            "relationships": []
        }

        bbox_elem = extract_elem(self.elem, ['idinfo', 'spdom', 'bounding'])
        if bbox_elem is not None:
            # that's not even valid
            west = extract_item(bbox_elem, ['westbc'])
            east = extract_item(bbox_elem, ['eastbc'])
            north = extract_item(bbox_elem, ['northbc'])
            south = extract_item(bbox_elem, ['southbc'])
            bbox = [west, south, east, north]
            bbox = bbox_to_geom(bbox)
            bbox = to_wkt(bbox)

            dataset.update({
                "dc:spatial": bbox,
                "esip:westBound": west,
                "esip:eastBound": east,
                "esip:northBound": north,
                "esip:southBound": south
            })

        time_elem = extract_elem(self.elem, ['idinfo', 'timeperd', 'timeinfo'])
        if time_elem is not None:
            caldate = extract_item(time_elem, ['sngdate', 'caldate'])
            if caldate:
                # TODO: we should see if it's at least a valid date
                dataset['esip:startDate'] = self._convert_date(caldate)

            rngdate = extract_elem(time_elem, ['rngdates'])
            if rngdate is not None:
                dataset['esip:startDate'] = self._convert_date(
                    extract_item(rngdate, ['begdate']))
                dataset['esip:endDate'] = self._convert_date(
                    extract_item(rngdate, ['enddate']))
            # TODO: add the min/max of the list of dates

        dataset['relationships'] = [{
            "relate": "bcube:hasMetadataRecord",
            "object_id": catalog_object_id
        }]

        publisher = {
            "object_id":
            generate_uuid_urn(),
            "name":
            extract_item(
                self.elem,
                ['idinfo', 'citation', 'citeinfo', 'pubinfo', 'publish']),
            "location":
            extract_item(
                self.elem,
                ['idinfo', 'citation', 'citeinfo', 'pubinfo', 'pubplace'])
        }
        output['publisher'] = publisher
        dataset['relationships'].append({
            "relate": "dcterms:publisher",
            "object_id": publisher['object_id']
        })

        distrib_elems = extract_elems(self.elem,
                                      ['distinfo', 'stdorder', 'digform'])

        for distrib_elem in distrib_elems:
            link = extract_item(
                distrib_elem,
                ['digtopt', 'onlinopt', 'computer', 'networka', 'networkr'])
            # format = extract_item(distrib_elem, ['digtinfo', 'formname'])
            url_sha = generate_sha_urn(link)
            if url_sha not in urls:
                urls.add(url_sha)
                url_id = generate_uuid_urn()
                dist = self._generate_harvest_manifest(
                    **{
                        "bcube:hasUrlSource": "Harvested",
                        "bcube:hasConfidence": "Good",
                        "vcard:hasURL": link,
                        "object_id": url_id,
                        "dc:identifier": url_sha
                    })
                dataset['urls'].append(dist)
                # this is a distribution link so
                # we are assuming it is to data
                dataset['relationships'].append({
                    "relate": "dcterms:references",
                    "object_id": url_id
                })

        webpages = []
        onlink_elems = extract_elems(
            self.elem, ['idinfo', 'citation', 'citeinfo', 'onlink'])
        for onlink_elem in onlink_elems:
            link = onlink_elem.text.strip() if onlink_elem.text else ''
            if not link:
                continue
            url_sha = generate_sha_urn(link)
            if url_sha not in urls:
                urls.add(url_sha)
                url_id = generate_uuid_urn()
                dist = self._generate_harvest_manifest(
                    **{
                        "bcube:hasUrlSource": "Harvested",
                        "bcube:hasConfidence": "Good",
                        "vcard:hasURL": link,
                        "object_id": url_id,
                        "dc:identifier": url_sha
                    })
                dataset['urls'].append(dist)
                webpages.append({
                    "object_id":
                    generate_uuid_urn(),
                    "relationships": [{
                        "relate": "dcterms:references",
                        "object_id": url_id
                    }]
                })

        output['catalog_record']['webpages'] = webpages
        for webpage in webpages:
            dataset['relationships'].append({
                "relate": "dcterms:references",
                "object_id": webpage['object_id']
            })

        # retain the keyword sets with type, thesaurus name and split
        # the terms as best we can
        keywords = []
        key_elem = extract_elem(self.elem, ['idinfo', 'keywords'])
        for child in key_elem.iterchildren():
            key_type = extract_element_tag(child.tag)
            key_tag = 'strat' if key_type == 'stratum' else key_type
            key_tag = 'temp' if key_tag == 'temporal' else key_tag
            thesaurus = extract_item(child, ['%skt' % key_tag])

            # TODO: split these up
            terms = extract_items(child, ['%skey' % key_tag])

            if terms:
                # if there's a parsing error (bad cdata, etc) may not have
                # TODO: add something for a set without a thesaurus name
                keywords.append(
                    tidy_dict({
                        "object_id": generate_uuid_urn(),
                        "dc:partOf": thesaurus,
                        "bcube:hasType": key_type,
                        "bcube:hasValue": terms
                    }))
        output['keywords'] = keywords
        for keyword in keywords:
            dataset['relationships'].append({
                "relate": "dc:conformsTo",
                "object_id": keyword['object_id']
            })

        output['datasets'] = [dataset]

        # add the metadata relate
        output['catalog_record']['relationships'].append({
            "relate":
            "foaf:primaryTopic",
            "object_id":
            dataset_object_id
        })

        output['catalog_records'] = [output['catalog_record']]
        del output['catalog_record']
        self.description = tidy_dict(output)
    def parse_item(self):
        output = {}

        urls = set()

        catalog_object_id = generate_uuid_urn()

        output['catalog_record'] = {
            "object_id": catalog_object_id,
            "bcube:dateCreated": self.harvest_details.get('harvest_date', ''),
            "bcube:lastUpdated": self.harvest_details.get('harvest_date', ''),
            # "dc:conformsTo": extract_attrib(
            #     self.elem, ['@noNamespaceSchemaLocation']).split(),
            "rdf:type": "FGDC:CSDGM",
            "relationships": [],
            "urls": []
        }
        output['urls'] = []

        # add the harvest info
        # this is not necessary as a sha just for set inclusion
        url_sha = generate_sha_urn(self.url)
        urls.add(url_sha)
        original_url = self._generate_harvest_manifest(**{
            "bcube:hasUrlSource": "Harvested",
            "bcube:hasConfidence": "Good",
            "vcard:hasURL": self.url,
            "object_id": generate_uuid_urn(),
            "dc:identifier": url_sha
        })
        output['catalog_record']['urls'].append(original_url)
        # NOTE: this is not the sha from the url
        output['catalog_record']['relationships'].append(
            {
                "relate": "bcube:originatedFrom",
                "object_id": original_url['object_id']
            }
        )

        datsetid = extract_item(self.elem, ['idinfo', 'datsetid'])
        dataset_object_id = generate_uuid_urn()

        dataset = {
            "object_id": dataset_object_id,
            "dcterms:identifier": datsetid,
            "bcube:dateCreated": self.harvest_details.get('harvest_date', ''),
            "bcube:lastUpdated": self.harvest_details.get('harvest_date', ''),
            "dc:description": extract_item(
                self.elem, ['idinfo', 'descript', 'abstract']),
            "dcterms:title": extract_item(
                self.elem, ['idinfo', 'citation', 'citeinfo', 'title']),
            "urls": [],
            "relationships": []
        }

        bbox_elem = extract_elem(self.elem, ['idinfo', 'spdom', 'bounding'])
        if bbox_elem is not None:
            # that's not even valid
            west = extract_item(bbox_elem, ['westbc'])
            east = extract_item(bbox_elem, ['eastbc'])
            north = extract_item(bbox_elem, ['northbc'])
            south = extract_item(bbox_elem, ['southbc'])
            bbox = [west, south, east, north]
            bbox = bbox_to_geom(bbox)
            bbox = to_wkt(bbox)

            dataset.update({
                "dc:spatial": bbox,
                "esip:westBound": west,
                "esip:eastBound": east,
                "esip:northBound": north,
                "esip:southBound": south
            })

        time_elem = extract_elem(self.elem, ['idinfo', 'timeperd', 'timeinfo'])
        if time_elem is not None:
            caldate = extract_item(time_elem, ['sngdate', 'caldate'])
            if caldate:
                # TODO: we should see if it's at least a valid date
                dataset['esip:startDate'] = self._convert_date(caldate)

            rngdate = extract_elem(time_elem, ['rngdates'])
            if rngdate is not None:
                dataset['esip:startDate'] = self._convert_date(
                    extract_item(rngdate, ['begdate']))
                dataset['esip:endDate'] = self._convert_date(
                    extract_item(rngdate, ['enddate']))
            # TODO: add the min/max of the list of dates

        dataset['relationships'] = [
            {
                "relate": "bcube:hasMetadataRecord",
                "object_id": catalog_object_id
            }
        ]

        publisher = {
            "object_id": generate_uuid_urn(),
            "name": extract_item(
                self.elem,
                ['idinfo', 'citation', 'citeinfo', 'pubinfo', 'publish']),
            "location": extract_item(
                self.elem,
                ['idinfo', 'citation', 'citeinfo', 'pubinfo', 'pubplace'])
        }
        output['publisher'] = publisher
        dataset['relationships'].append({
            "relate": "dcterms:publisher",
            "object_id": publisher['object_id']
        })

        distrib_elems = extract_elems(
            self.elem, ['distinfo', 'stdorder', 'digform'])

        for distrib_elem in distrib_elems:
            link = extract_item(
                distrib_elem,
                ['digtopt', 'onlinopt', 'computer', 'networka', 'networkr'])
            # format = extract_item(distrib_elem, ['digtinfo', 'formname'])
            url_sha = generate_sha_urn(link)
            if url_sha not in urls:
                urls.add(url_sha)
                url_id = generate_uuid_urn()
                dist = self._generate_harvest_manifest(**{
                    "bcube:hasUrlSource": "Harvested",
                    "bcube:hasConfidence": "Good",
                    "vcard:hasURL": link,
                    "object_id": url_id,
                    "dc:identifier": url_sha
                })
                dataset['urls'].append(dist)
                # this is a distribution link so
                # we are assuming it is to data
                dataset['relationships'].append({
                    "relate": "dcterms:references",
                    "object_id": url_id
                })

        webpages = []
        onlink_elems = extract_elems(
            self.elem, ['idinfo', 'citation', 'citeinfo', 'onlink'])
        for onlink_elem in onlink_elems:
            link = onlink_elem.text.strip() if onlink_elem.text else ''
            if not link:
                continue
            url_sha = generate_sha_urn(link)
            if url_sha not in urls:
                urls.add(url_sha)
                url_id = generate_uuid_urn()
                dist = self._generate_harvest_manifest(**{
                    "bcube:hasUrlSource": "Harvested",
                    "bcube:hasConfidence": "Good",
                    "vcard:hasURL": link,
                    "object_id": url_id,
                    "dc:identifier": url_sha
                })
                dataset['urls'].append(dist)
                webpages.append({
                    "object_id": generate_uuid_urn(),
                    "relationships": [
                        {
                            "relate": "dcterms:references",
                            "object_id": url_id
                        }
                    ]}
                )

        output['catalog_record']['webpages'] = webpages
        for webpage in webpages:
            dataset['relationships'].append({
                "relate": "dcterms:references",
                "object_id": webpage['object_id']
            })

        # retain the keyword sets with type, thesaurus name and split
        # the terms as best we can
        keywords = []
        key_elem = extract_elem(self.elem, ['idinfo', 'keywords'])
        for child in key_elem.iterchildren():
            key_type = extract_element_tag(child.tag)
            key_tag = 'strat' if key_type == 'stratum' else key_type
            key_tag = 'temp' if key_tag == 'temporal' else key_tag
            thesaurus = extract_item(child, ['%skt' % key_tag])

            # TODO: split these up
            terms = extract_items(child, ['%skey' % key_tag])

            if terms:
                # if there's a parsing error (bad cdata, etc) may not have
                # TODO: add something for a set without a thesaurus name
                keywords.append(
                    tidy_dict({
                        "object_id": generate_uuid_urn(),
                        "dc:partOf": thesaurus,
                        "bcube:hasType": key_type,
                        "bcube:hasValue": terms
                    })
                )
        output['keywords'] = keywords
        for keyword in keywords:
            dataset['relationships'].append(
                {
                    "relate": "dc:conformsTo",
                    "object_id": keyword['object_id']
                }
            )

        output['datasets'] = [dataset]

        # add the metadata relate
        output['catalog_record']['relationships'].append(
            {
                "relate": "foaf:primaryTopic",
                "object_id": dataset_object_id
            }
        )

        output['catalog_records'] = [output['catalog_record']]
        del output['catalog_record']
        self.description = tidy_dict(output)
Beispiel #9
0
    def _parse_getcap_datasets(self, reader):
        '''
        from some content metadata object, get all of the
        layers/features/coverages

        output:
            name
            title
            srs
            bounding boxes
            wgs84 bbox

            style

            metadataurl (should be a relate)

            elevation
            time

        note: the values are lists to handle other service responses
              that may have multiple values for that element.
        '''
        datasets = []

        if reader.contents is None:
            return []

        for name, dataset in reader.contents.iteritems():
            d = {}

            d['name'] = name
            if dataset.title:
                d['title'] = dataset.title

            if dataset.abstract:
                d['abstract'] = dataset.abstract

            if dataset.metadataUrls:
                d['metadata_urls'] = dataset.metadataUrls

            if dataset.boundingBoxes:
                d['bboxes'] = dataset.boundingBoxes

            # if dataset.boundingBoxWGS84:
            #     d['bbox'] = [dataset.boundingBoxWGS84]

            try:
                # convert to wkt (and there's something about sos - maybe not harmonized)
                if dataset.boundingBoxWGS84:
                    bbox = bbox_to_geom(dataset.boundingBoxWGS84)
                    d['bbox'] = {
                        'dc:spatial': to_wkt(bbox),
                        'esip:westBound': dataset.boundingBoxWGS84[0],
                        'esip:eastBound': dataset.boundingBoxWGS84[2],
                        'esip:northBound': dataset.boundingBoxWGS84[3],
                        'esip:southBound': dataset.boundingBoxWGS84[1]
                    }
            except AttributeError:
                pass

            if dataset.crsOptions:
                d['spatial_refs'] = dataset.crsOptions

            if dataset.attribution:
                d['rights'] = [dataset.attribution]

            if dataset.timepositions:
                d['temporal'] = dataset.timepositions

                begin_time, end_time = self._return_timerange(
                    dataset.timepositions)
                d['temporal_extent'] = {
                    "begin": begin_time.isoformat(),
                    "end": end_time.isoformat()
                }

            # SOS 2.0.2 specific attributes
            if 'temporal_extent' not in d:
                d['temporal_extent'] = {}
            try:
                # because it has support for different time element names
                if dataset.begin_position:
                    d['temporal_extent']['begin'] = dataset.begin_position
            except AttributeError:
                pass

            try:
                # because it has support for different time element names
                if dataset.end_position:
                    d['temporal_extent']['end'] = dataset.end_position
            except AttributeError:
                pass

            try:
                if dataset.observed_properties:
                    d['observed_properties'] = dataset.observed_properties
            except AttributeError:
                pass

            try:
                if dataset.procedures:
                    d['procedures'] = dataset.procedures
            except AttributeError:
                pass

            try:
                if dataset.procedure_description_formats:
                    d['procedure_description_formats'] = dataset.procedure_description_formats
            except AttributeError:
                pass

            try:
                if dataset.features_of_interest:
                    d['features_of_interest'] = dataset.features_of_interest
            except AttributeError:
                pass

            try:
                if dataset.observation_models:
                    d['observation_models'] = dataset.observation_models
            except AttributeError:
                pass

            # and some of the WFS-specific bits
            try:
                if dataset.verbOptions:
                    d['verbs'] = dataset.verbOptions
            except AttributeError:
                pass

            # handling the sos vs wfs output formats (ows related)
            try:
                if dataset.outputFormats:
                    d['output_formats'] = dataset.outputFormats
            except AttributeError:
                pass
            try:
                if dataset.response_formats:
                    d['output_formats'] = dataset.response_formats
            except AttributeError:
                pass

            datasets.append(d)

        return datasets
Beispiel #10
0
    def _parse_coverages(self, reader):
        def _return_timerange(start_range, end_range):
            try:
                start_date = dateparser.parse(start_range)
            except:
                start_date = None
            try:
                end_date = dateparser.parse(end_range)
            except:
                end_date = None

            return start_date, end_date

        datasets = []

        if reader.coverages is None:
            return []

        for coverage in reader.coverages:
            d = {}
            d['name'] = coverage.name
            if coverage.description:
                d['abstract'] = coverage.description

            if coverage.min_pos and coverage.max_pos:
                # TODO: translate this to a bbox
                min_pos = coverage.min_pos
                max_pos = coverage.max_pos
                crs_urn = coverage.srs_urn

                min_coord = map(float, min_pos.split())
                max_coord = map(float, max_pos.split())

                bbox = bbox_to_geom(min_coord + max_coord)
                # TODO: there's an issue with the gdal_data path
                #       where it is not finding the epsg registry
                # bbox = reproject(bbox, crs_urn, 'EPSG:4326')

                d['bbox'] = to_wkt(bbox)

            # TODO: what to do about the main envelope vs all the domainSet bboxes?
            try:
                if coverage.temporal_domain:
                    begin_range = coverage.temporal_domain.get(
                        'begin_position', '')
                    end_range = coverage.temporal_domain.get(
                        'end_position', '')
                    begin_time, end_time = _return_timerange(
                        begin_range, end_range)
                    d['temporal_extent'] = {
                        "begin": begin_time.isoformat(),
                        "end": end_time.isoformat()
                    }
            except AttributeError:
                pass

            try:
                if coverage.supported_formats:
                    d['formats'] = coverage.supported_formats
            except AttributeError:
                pass

            try:
                if coverage.supported_crs:
                    d['spatial_refs'] = coverage.supported_crs
            except AttributeError:
                pass

            datasets.append(d)

        return datasets
    def _parse_getcap_datasets(self, reader):
        '''
        from some content metadata object, get all of the
        layers/features/coverages

        output:
            name
            title
            srs
            bounding boxes
            wgs84 bbox

            style

            metadataurl (should be a relate)

            elevation
            time

        note: the values are lists to handle other service responses
              that may have multiple values for that element.
        '''
        datasets = []

        if reader.contents is None:
            return []

        for name, dataset in reader.contents.iteritems():
            d = {}

            d['name'] = name
            if dataset.title:
                d['title'] = dataset.title

            if dataset.abstract:
                d['abstract'] = dataset.abstract

            if dataset.metadataUrls:
                d['metadata_urls'] = dataset.metadataUrls

            if dataset.boundingBoxes:
                d['bboxes'] = dataset.boundingBoxes

            # if dataset.boundingBoxWGS84:
            #     d['bbox'] = [dataset.boundingBoxWGS84]

            try:
                # convert to wkt (and there's something about sos - maybe not harmonized)
                if dataset.boundingBoxWGS84:
                    bbox = bbox_to_geom(dataset.boundingBoxWGS84)
                    d['bbox'] = {
                        'dc:spatial': to_wkt(bbox),
                        'esip:westBound': dataset.boundingBoxWGS84[0],
                        'esip:eastBound': dataset.boundingBoxWGS84[2],
                        'esip:northBound': dataset.boundingBoxWGS84[3],
                        'esip:southBound': dataset.boundingBoxWGS84[1]
                    }
            except AttributeError:
                pass

            if dataset.crsOptions:
                d['spatial_refs'] = dataset.crsOptions

            if dataset.attribution:
                d['rights'] = [dataset.attribution]

            if dataset.timepositions:
                d['temporal'] = dataset.timepositions

                begin_time, end_time = self._return_timerange(dataset.timepositions)
                d['temporal_extent'] = {"begin": begin_time.isoformat(),
                                        "end": end_time.isoformat()}

            # SOS 2.0.2 specific attributes
            if 'temporal_extent' not in d:
                d['temporal_extent'] = {}
            try:
                # because it has support for different time element names
                if dataset.begin_position:
                    d['temporal_extent']['begin'] = dataset.begin_position
            except AttributeError:
                pass

            try:
                # because it has support for different time element names
                if dataset.end_position:
                    d['temporal_extent']['end'] = dataset.end_position
            except AttributeError:
                pass

            try:
                if dataset.observed_properties:
                    d['observed_properties'] = dataset.observed_properties
            except AttributeError:
                pass

            try:
                if dataset.procedures:
                    d['procedures'] = dataset.procedures
            except AttributeError:
                pass

            try:
                if dataset.procedure_description_formats:
                    d['procedure_description_formats'] = dataset.procedure_description_formats
            except AttributeError:
                pass

            try:
                if dataset.features_of_interest:
                    d['features_of_interest'] = dataset.features_of_interest
            except AttributeError:
                pass

            try:
                if dataset.observation_models:
                    d['observation_models'] = dataset.observation_models
            except AttributeError:
                pass

            # and some of the WFS-specific bits
            try:
                if dataset.verbOptions:
                    d['verbs'] = dataset.verbOptions
            except AttributeError:
                pass

            # handling the sos vs wfs output formats (ows related)
            try:
                if dataset.outputFormats:
                    d['output_formats'] = dataset.outputFormats
            except AttributeError:
                pass
            try:
                if dataset.response_formats:
                    d['output_formats'] = dataset.response_formats
            except AttributeError:
                pass

            datasets.append(d)

        return datasets
    def _parse_coverages(self, reader):
        def _return_timerange(start_range, end_range):
            try:
                start_date = dateparser.parse(start_range)
            except:
                start_date = None
            try:
                end_date = dateparser.parse(end_range)
            except:
                end_date = None

            return start_date, end_date

        datasets = []

        if reader.coverages is None:
            return []

        for coverage in reader.coverages:
            d = {}
            d['name'] = coverage.name
            if coverage.description:
                d['abstract'] = coverage.description

            if coverage.min_pos and coverage.max_pos:
                # TODO: translate this to a bbox
                min_pos = coverage.min_pos
                max_pos = coverage.max_pos
                crs_urn = coverage.srs_urn

                min_coord = map(float, min_pos.split())
                max_coord = map(float, max_pos.split())

                bbox = bbox_to_geom(min_coord + max_coord)
                # TODO: there's an issue with the gdal_data path
                #       where it is not finding the epsg registry
                # bbox = reproject(bbox, crs_urn, 'EPSG:4326')

                d['bbox'] = to_wkt(bbox)

            # TODO: what to do about the main envelope vs all the domainSet bboxes?
            try:
                if coverage.temporal_domain:
                    begin_range = coverage.temporal_domain.get('begin_position', '')
                    end_range = coverage.temporal_domain.get('end_position', '')
                    begin_time, end_time = _return_timerange(begin_range, end_range)
                    d['temporal_extent'] = {"begin": begin_time.isoformat(),
                                            "end": end_time.isoformat()}
            except AttributeError:
                pass

            try:
                if coverage.supported_formats:
                    d['formats'] = coverage.supported_formats
            except AttributeError:
                pass

            try:
                if coverage.supported_crs:
                    d['spatial_refs'] = coverage.supported_crs
            except AttributeError:
                pass

            datasets.append(d)

        return datasets