Python extractIdentifierFromFullURL Examples

Programming Language: Python

Namespace/Package Name: d1lod.dataone

Method/Function: extractIdentifierFromFullURL

Examples at hotexamples.com: 2

Python extractIdentifierFromFullURL - 2 examples found. These are the top rated real world Python examples of d1lod.dataone.extractIdentifierFromFullURL extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_dataone.py Project: rushirajnenuji/d1lod

def test_extracting_identifiers_from_urls():
    # Returns None when it should
    assert dataone.extractIdentifierFromFullURL('asdf') is None
    assert dataone.extractIdentifierFromFullURL(1) is None
    assert dataone.extractIdentifierFromFullURL('1') is None
    assert dataone.extractIdentifierFromFullURL('http://google.com') is None

    # Extracts the right thing
    assert dataone.extractIdentifierFromFullURL('https://cn.dataone.org/cn/v1/meta/some_pid') == 'some_pid'
    assert dataone.extractIdentifierFromFullURL('https://cn.dataone.org/cn/v1/meta/kgordon.23.30') == 'kgordon.23.30'
    assert dataone.extractIdentifierFromFullURL('https://cn.dataone.org/cn/v1/resolve/kgordon.23.30') == 'kgordon.23.30'
    assert dataone.extractIdentifierFromFullURL('https://cn.dataone.org/cn/v1/object/kgordon.23.30') == 'kgordon.23.30'
    assert dataone.extractIdentifierFromFullURL('https://cn.dataone.org/cn/v2/object/kgordon.23.30') == 'kgordon.23.30'

Example #2

Show file

File: interface.py Project: ec-geolink/d1lod

    def addDatasetTriples(self, dataset_node, doc):
        if self.model is None:
            raise Exception("Model not found.")

        identifier = dataone.extractDocumentIdentifier(doc)
        identifier_esc = urllib.quote_plus(identifier)

        # type Dataset
        self.add(dataset_node, 'rdf:type', 'geolink:Dataset')

        # Title
        title_element = doc.find("./str[@name='title']")

        if title_element is not None:
            self.add(dataset_node, 'rdfs:label', RDF.Node(title_element.text))

        # Add geolink:Identifier
        self.addIdentifierTriples(dataset_node, identifier)

        # Abstract
        abstract_element = doc.find("./str[@name='abstract']")

        if (abstract_element is not None):
            self.add(dataset_node, 'geolink:description', RDF.Node(abstract_element.text))

        # Spatial Coverage
        bound_north = doc.find("./float[@name='northBoundCoord']")
        bound_east = doc.find("./float[@name='eastBoundCoord']")
        bound_south = doc.find("./float[@name='southBoundCoord']")
        bound_west = doc.find("./float[@name='westBoundCoord']")

        if all(ele is not None for ele in [bound_north, bound_east, bound_south, bound_west]):
            if bound_north.text == bound_south.text and bound_west.text == bound_east.text:
                wktliteral = "POINT (%s %s)" % (bound_north.text, bound_east.text)
            else:
                wktliteral = "POLYGON ((%s %s, %s %s, %s %s, %s, %s))" % (bound_west.text, bound_north.text, bound_east.text, bound_north.text, bound_east.text, bound_south.text, bound_west.text, bound_south.text)

            self.add(dataset_node, 'geolink:hasGeometryAsWktLiteral', RDF.Node(wktliteral))

        # Temporal Coverage
        begin_date = doc.find("./date[@name='beginDate']")
        end_date = doc.find("./date[@name='endDate']")

        if begin_date is not None:
            self.add(dataset_node, 'geolink:hasStartDate', RDF.Node(begin_date.text))

        if end_date is not None:
            self.add(dataset_node, 'geolink:hasEndDate', RDF.Node(end_date.text))

        # Obsoletes as PROV#wasRevisionOf
        obsoletes_node = doc.find("./str[@name='obsoletes']")

        if obsoletes_node is not None:
            other_document_esc = urllib.quote_plus(obsoletes_node.text)
            self.add(dataset_node, 'prov:wasRevisionOf', RDF.Uri(self.repository.ns['d1dataset'] + other_document_esc))

        # Landing page
        self.add(dataset_node, 'geolink:hasLandingPage', RDF.Uri("https://search.dataone.org/#view/" + identifier_esc))


        # Digital Objects
        # If this document has a resource map, get digital objects from there
        # Otherwise, use the cito:documents field in Solr

        resource_map_identifiers = doc.findall("./arr[@name='resourceMap']/str")

        if len(resource_map_identifiers) > 0:
            for resource_map_node in resource_map_identifiers:
                resource_map_identifier = resource_map_node.text

                digital_objects = dataone.getAggregatedIdentifiers(resource_map_identifier)

                for digital_object in digital_objects:
                    digital_object_identifier = urllib.unquote_plus(digital_object)
                    self.addDigitalObject(identifier, digital_object_identifier)
        else:
            # If no resourceMap or documents field, at least add the metadata
            # file as a digital object
            # dataUrl e.g. https://cn.dataone.org/cn/v1/resolve/doi%3A10.6073%2FAA%2Fknb-lter-cdr.70061.123

            data_url_node = doc.find("./str[@name='dataUrl']")

            if data_url_node is not None:
                data_url = data_url_node.text
                digital_object = dataone.extractIdentifierFromFullURL(data_url)
                digital_object = urllib.unquote_plus(digital_object)

                self.addDigitalObject(identifier, digital_object)