def test_parsing_resource_map(): pid = 'resourceMap_df35d.3.2' aggd_pids = dataone.getAggregatedIdentifiers(pid) assert len(aggd_pids) == 7
def addDatasetTriples(self, dataset_node, doc): if self.model is None: raise Exception("Model not found.") identifier = dataone.extractDocumentIdentifier(doc) identifier_esc = urllib.quote_plus(identifier) # type Dataset self.add(dataset_node, 'rdf:type', 'geolink:Dataset') # Title title_element = doc.find("./str[@name='title']") if title_element is not None: self.add(dataset_node, 'rdfs:label', RDF.Node(title_element.text)) # Add geolink:Identifier self.addIdentifierTriples(dataset_node, identifier) # Abstract abstract_element = doc.find("./str[@name='abstract']") if (abstract_element is not None): self.add(dataset_node, 'geolink:description', RDF.Node(abstract_element.text)) # Spatial Coverage bound_north = doc.find("./float[@name='northBoundCoord']") bound_east = doc.find("./float[@name='eastBoundCoord']") bound_south = doc.find("./float[@name='southBoundCoord']") bound_west = doc.find("./float[@name='westBoundCoord']") if all(ele is not None for ele in [bound_north, bound_east, bound_south, bound_west]): if bound_north.text == bound_south.text and bound_west.text == bound_east.text: wktliteral = "POINT (%s %s)" % (bound_north.text, bound_east.text) else: wktliteral = "POLYGON ((%s %s, %s %s, %s %s, %s, %s))" % (bound_west.text, bound_north.text, bound_east.text, bound_north.text, bound_east.text, bound_south.text, bound_west.text, bound_south.text) self.add(dataset_node, 'geolink:hasGeometryAsWktLiteral', RDF.Node(wktliteral)) # Temporal Coverage begin_date = doc.find("./date[@name='beginDate']") end_date = doc.find("./date[@name='endDate']") if begin_date is not None: self.add(dataset_node, 'geolink:hasStartDate', RDF.Node(begin_date.text)) if end_date is not None: self.add(dataset_node, 'geolink:hasEndDate', RDF.Node(end_date.text)) # Obsoletes as PROV#wasRevisionOf obsoletes_node = doc.find("./str[@name='obsoletes']") if obsoletes_node is not None: other_document_esc = urllib.quote_plus(obsoletes_node.text) self.add(dataset_node, 'prov:wasRevisionOf', RDF.Uri(self.repository.ns['d1dataset'] + other_document_esc)) # Landing page self.add(dataset_node, 'geolink:hasLandingPage', RDF.Uri("https://search.dataone.org/#view/" + identifier_esc)) # Digital Objects # If this document has a resource map, get digital objects from there # Otherwise, use the cito:documents field in Solr resource_map_identifiers = doc.findall("./arr[@name='resourceMap']/str") if len(resource_map_identifiers) > 0: for resource_map_node in resource_map_identifiers: resource_map_identifier = resource_map_node.text digital_objects = dataone.getAggregatedIdentifiers(resource_map_identifier) for digital_object in digital_objects: digital_object_identifier = urllib.unquote_plus(digital_object) self.addDigitalObject(identifier, digital_object_identifier) else: # If no resourceMap or documents field, at least add the metadata # file as a digital object # dataUrl e.g. https://cn.dataone.org/cn/v1/resolve/doi%3A10.6073%2FAA%2Fknb-lter-cdr.70061.123 data_url_node = doc.find("./str[@name='dataUrl']") if data_url_node is not None: data_url = data_url_node.text digital_object = dataone.extractIdentifierFromFullURL(data_url) digital_object = urllib.unquote_plus(digital_object) self.addDigitalObject(identifier, digital_object)