def add_to_xml_container(self, container): """Generates xml+rdf representation of all the metadata elements associated with this logical file type instance. Subclass must override this if it has additional metadata elements.""" NAMESPACES = CoreMetaData.NAMESPACES dataset_container = etree.SubElement( container, '{%s}Dataset' % NAMESPACES['hsterms']) rdf_Description = etree.SubElement( dataset_container, '{%s}Description' % NAMESPACES['rdf']) dc_datatype = etree.SubElement(rdf_Description, '{%s}type' % NAMESPACES['dc']) data_type = current_site_url( ) + "/terms/" + self.logical_file.data_type dc_datatype.set('{%s}resource' % NAMESPACES['rdf'], data_type) if self.logical_file.dataset_name: dc_datatitle = etree.SubElement(rdf_Description, '{%s}title' % NAMESPACES['dc']) dc_datatitle.text = self.logical_file.dataset_name # add fileType node for res_file in self.logical_file.files.all(): hsterms_datafile = etree.SubElement( rdf_Description, '{%s}dataFile' % NAMESPACES['hsterms']) rdf_dataFile_Description = etree.SubElement( hsterms_datafile, '{%s}Description' % NAMESPACES['rdf']) file_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url(), res_id=self.logical_file.resource.short_id, file_name=res_file.short_path) rdf_dataFile_Description.set('{%s}about' % NAMESPACES['rdf'], file_uri) dc_title = etree.SubElement(rdf_dataFile_Description, '{%s}title' % NAMESPACES['dc']) file_name = get_resource_file_name_and_extension(res_file)[1] dc_title.text = file_name dc_format = etree.SubElement(rdf_dataFile_Description, '{%s}format' % NAMESPACES['dc']) dc_format.text = res_file.mime_type self.add_keywords_to_xml_container(rdf_Description) self.add_extra_metadata_to_xml_container(rdf_Description) for coverage in self.coverages.all(): coverage.add_to_xml_container(rdf_Description) return rdf_Description
def get_xml(self, pretty_print=True): from lxml import etree # get the xml string representation of the core metadata elements xml_string = super(ModelInstanceMetaData, self).get_xml(pretty_print=False) # create an etree xml object RDF_ROOT = etree.fromstring(xml_string) # get root 'Description' element that contains all other elements container = RDF_ROOT.find('rdf:Description', namespaces=self.NAMESPACES) if self.model_output: hsterms_model_output = etree.SubElement(container, '{%s}ModelOutput' % self.NAMESPACES['hsterms']) hsterms_model_output_rdf_Description = etree.SubElement(hsterms_model_output, '{%s}Description' % self.NAMESPACES['rdf']) hsterms_model_output_value = etree.SubElement(hsterms_model_output_rdf_Description, '{%s}IncludesModelOutput' % self.NAMESPACES['hsterms']) if self.model_output.includes_output == True: hsterms_model_output_value.text = "Yes" else: hsterms_model_output_value.text = "No" if self.executed_by: hsterms_executed_by = etree.SubElement(container, '{%s}ExecutedBy' % self.NAMESPACES['hsterms']) hsterms_executed_by_rdf_Description = etree.SubElement(hsterms_executed_by, '{%s}Description' % self.NAMESPACES['rdf']) hsterms_executed_by_name = etree.SubElement(hsterms_executed_by_rdf_Description, '{%s}ModelProgramName' % self.NAMESPACES['hsterms']) title = self.executed_by.model_program_fk.title if self.executed_by.model_program_fk else "Unspecified" hsterms_executed_by_name.text = title hsterms_executed_by_url = etree.SubElement(hsterms_executed_by_rdf_Description, '{%s}ModelProgramURL' % self.NAMESPACES['hsterms']) url = '%s%s' % (utils.current_site_url(), self.executed_by.model_program_fk.get_absolute_url()) if self.executed_by.model_program_fk else "None" hsterms_executed_by_url.text = url return etree.tostring(RDF_ROOT, pretty_print=True)
def modelProgramIdentifier(self): if self.model_program_fk: return '%s%s' % (utils.current_site_url(), self.model_program_fk.get_absolute_url()) else: return "None"
import os import tempfile import csv import shutil import logging from django.core.files.uploadedfile import UploadedFile from hs_core.hydroshare.utils import resource_modified, current_site_url from hs_core.hydroshare.resource import delete_resource_file_only, add_resource_files logger = logging.getLogger(__name__) RES_LANDING_PAGE_URL_TEMPLATE = current_site_url() + "/resource/{0}/" CSV_FULL_NAME_TEMPLATE = "collection_list_{0}.csv" DELETED_RES_STRING = "Resource Deleted" def add_or_remove_relation_metadata(add=True, target_res_obj=None, relation_type="", relation_value="", set_res_modified=False, last_change_user=None): """ add new or remove relation metadata to/from target res obj :param add: True -- add metadata; False -- remove metadata :param target_res_obj: the target res obj to receive the change :param relation_type: "hasPart" or "isPartOf" :param relation_value: value of relation :param set_res_modified: set bag modified flag to True or False :param last_change_user: the User obj represents the last_change_by user
def create_bag_files(resource): """ create and update files needed by bagit operation that is conducted on iRODS server; no bagit operation is performed, only files that will be included in the bag are created or updated. Parameters: :param resource: A resource whose files will be created or updated to be included in the resource bag. :return: istorage, an IrodsStorage object that will be used by subsequent operation to create a bag on demand as needed. """ from hs_core.hydroshare.utils import current_site_url, get_file_mime_type istorage = resource.get_irods_storage() # the temp_path is a temporary holding path to make the files available to iRODS # we have to make temp_path unique even for the same resource with same update time # to accommodate asynchronous multiple file move operations for the same resource # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result! temp_path = istorage.getUniqueTmpPath try: os.makedirs(temp_path) except OSError as ex: # TODO: there might be concurrent operations. if ex.errno == errno.EEXIST: shutil.rmtree(temp_path) os.makedirs(temp_path) else: raise Exception(ex.message) # an empty visualization directory will not be put into the zipped bag file by ibun command, # so creating an empty visualization directory to be put into the zip file as done by the two # statements below does not work. However, if visualization directory has content to be # uploaded, it will work. This is to be implemented as part of the resource model in the future. # The following two statements are placeholders serving as reminder # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id) # istorage.saveFile('', to_file_name, create_directory=True) # create resourcemetadata.xml in local directory and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemetadata.xml') with open(from_file_name, 'w') as out: # write resource level metadata out.write(resource.get_metadata_xml()) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml') istorage.saveFile(from_file_name, to_file_name, True) # URLs are found in the /data/ subdirectory to comply with bagit format assumptions current_site_url = current_site_url() # This is the qualified resource url. hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id, 'data') # this is the path to the resourcemedata file for download metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml') # this is the path to the resourcemap file for download res_map_url = os.path.join(hs_res_url, 'resourcemap.xml') # make the resource map: utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/') utils.namespaceSearchOrder.append('citoterms') ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation') a = Aggregation(ag_url) # Set properties of the aggregation a._dc.title = resource.metadata.title.value a._dcterms.type = URIRef(resource.metadata.type.url) a._citoterms.isDocumentedBy = metadata_url a._ore.isDescribedBy = res_map_url res_type_aggregation = AggregatedResource(resource.metadata.type.url) res_type_aggregation._rdfs.label = resource._meta.verbose_name res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms" a.add_resource(res_type_aggregation) # Create a description of the metadata document that describes the whole resource and add it # to the aggregation resMetaFile = AggregatedResource(metadata_url) resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \ "resource" resMetaFile._citoterms.documents = ag_url resMetaFile._ore.isAggregatedBy = ag_url resMetaFile._dc.format = "application/rdf+xml" a.add_resource(resMetaFile) # Add the resource files to the aggregation files = ResourceFile.objects.filter(object_id=resource.id) for f in files: # only the files that are not part of file type aggregation (logical file) # should be added to the resource level map xml file if f.logical_file is None: res_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url, res_id=resource.short_id, file_name=f.short_path) ar = AggregatedResource(res_uri) ar._ore.isAggregatedBy = ag_url ar._dc.format = get_file_mime_type(os.path.basename(f.short_path)) a.add_resource(ar) # handle collection resource type # save contained resource urls into resourcemap.xml if resource.resource_type == "CollectionResource" and resource.resources: for contained_res in resource.resources.all(): contained_res_id = contained_res.short_id resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format( hs_url=current_site_url, res_id=contained_res_id) ar = AggregatedResource(resource_map_url) ar._ore.isAggregatedBy = ag_url ar._dc.format = "application/rdf+xml" a.add_resource(ar) elif resource.resource_type == "CompositeResource": # add file type aggregations to resource aggregation for logical_file in resource.logical_files: if logical_file.has_parent: # skip nested aggregations continue aggr_uri = u'{hs_url}/resource/{res_id}/data/contents/{map_file_path}#aggregation' aggr_uri = aggr_uri.format( hs_url=current_site_url, res_id=resource.short_id, map_file_path=logical_file.map_short_file_path) agg = Aggregation(aggr_uri) agg._ore.isAggregatedBy = ag_url agg_type_url = "{site}/terms/{aggr_type}" agg_type_url = agg_type_url.format(site=current_site_url, aggr_type=logical_file.get_aggregation_type_name()) agg._dcterms.type = URIRef(agg_type_url) a.add_resource(agg) # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI serializer = RdfLibSerializer('xml') resMap = a.register_serialization(serializer, res_map_url) resMap._dc.identifier = resource.short_id # Fetch the serialization remdoc = a.get_serialization() # change the namespace for the 'creator' element from 'dcterms' to 'dc' xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator') # delete this extra element # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/> xml_string = xml_string.replace( '<ore:aggregates rdf:resource="%s"/>\n' % str(resource.metadata.type.url), '') # create resourcemap.xml and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemap.xml') with open(from_file_name, 'w') as out: out.write(xml_string) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml') istorage.saveFile(from_file_name, to_file_name, False) # if the resource is a composite resource generate aggregation metadata # and map xml documents if resource.resource_type == "CompositeResource": resource.create_aggregation_xml_documents() res_coll = resource.root_path istorage.setAVU(res_coll, 'metadata_dirty', "false") shutil.rmtree(temp_path) return istorage
def create_bag_files(resource): """ create and update files needed by bagit operation that is conducted on iRODS server; no bagit operation is performed, only files that will be included in the bag are created or updated. Parameters: :param resource: A resource whose files will be created or updated to be included in the resource bag. :return: istorage, an IrodsStorage object that will be used by subsequent operation to create a bag on demand as needed. """ from hs_core.hydroshare.utils import current_site_url, get_file_mime_type istorage = resource.get_irods_storage() # the temp_path is a temporary holding path to make the files available to iRODS # we have to make temp_path unique even for the same resource with same update time # to accommodate asynchronous multiple file move operations for the same resource # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result! temp_path = os.path.join(getattr(settings, 'IRODS_ROOT', '/tmp'), uuid4().hex) try: os.makedirs(temp_path) except OSError as ex: # TODO: there might be concurrent operations. if ex.errno == errno.EEXIST: shutil.rmtree(temp_path) os.makedirs(temp_path) else: raise Exception(ex.message) # an empty visualization directory will not be put into the zipped bag file by ibun command, # so creating an empty visualization directory to be put into the zip file as done by the two # statements below does not work. However, if visualization directory has content to be # uploaded, it will work. This is to be implemented as part of the resource model in the future. # The following two statements are placeholders serving as reminder # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id) # istorage.saveFile('', to_file_name, create_directory=True) # create resourcemetadata.xml in local directory and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemetadata.xml') with open(from_file_name, 'w') as out: # resources that don't support file types this would write only resource level metadata # resource types that support file types this would write resource level metadata # as well as file type metadata out.write(resource.get_metadata_xml()) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml') istorage.saveFile(from_file_name, to_file_name, True) # URLs are found in the /data/ subdirectory to comply with bagit format assumptions current_site_url = current_site_url() # This is the qualified resource url. hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id, 'data') # this is the path to the resourcemedata file for download metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml') # this is the path to the resourcemap file for download res_map_url = os.path.join(hs_res_url, 'resourcemap.xml') # make the resource map: utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/') utils.namespaceSearchOrder.append('citoterms') ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation') a = Aggregation(ag_url) # Set properties of the aggregation a._dc.title = resource.metadata.title.value a._dcterms.type = URIRef(resource.metadata.type.url) a._citoterms.isDocumentedBy = metadata_url a._ore.isDescribedBy = res_map_url res_type_aggregation = AggregatedResource(resource.metadata.type.url) res_type_aggregation._rdfs.label = resource._meta.verbose_name res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms" a.add_resource(res_type_aggregation) # Create a description of the metadata document that describes the whole resource and add it # to the aggregation resMetaFile = AggregatedResource(metadata_url) resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \ "resource" resMetaFile._citoterms.documents = ag_url resMetaFile._ore.isAggregatedBy = ag_url resMetaFile._dc.format = "application/rdf+xml" # Create a description of the content file and add it to the aggregation files = ResourceFile.objects.filter(object_id=resource.id) resFiles = [] for n, f in enumerate(files): res_uri = '{hs_url}/resource/{res_id}/data/contents/{file_name}'.format( hs_url=current_site_url, res_id=resource.short_id, file_name=f.short_path) resFiles.append(AggregatedResource(res_uri)) resFiles[n]._ore.isAggregatedBy = ag_url resFiles[n]._dc.format = get_file_mime_type( os.path.basename(f.short_path)) # Add the resource files to the aggregation a.add_resource(resMetaFile) for f in resFiles: a.add_resource(f) # handle collection resource type # save contained resource urls into resourcemap.xml if resource.resource_type == "CollectionResource" and resource.resources: for contained_res in resource.resources.all(): contained_res_id = contained_res.short_id resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format( hs_url=current_site_url, res_id=contained_res_id) ar = AggregatedResource(resource_map_url) ar._ore.isAggregatedBy = ag_url ar._dc.format = "application/rdf+xml" a.add_resource(ar) # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI serializer = RdfLibSerializer('xml') resMap = a.register_serialization(serializer, res_map_url) resMap._dc.identifier = resource.short_id # Fetch the serialization remdoc = a.get_serialization() # change the namespace for the 'creator' element from 'dcterms' to 'dc' xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator') # delete this extra element # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/> xml_string = xml_string.replace( '<ore:aggregates rdf:resource="%s"/>\n' % str(resource.metadata.type.url), '') # create resourcemap.xml and upload it to iRODS from_file_name = os.path.join(temp_path, 'resourcemap.xml') with open(from_file_name, 'w') as out: out.write(xml_string) to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml') istorage.saveFile(from_file_name, to_file_name, False) res_coll = resource.root_path istorage.setAVU(res_coll, 'metadata_dirty', "false") shutil.rmtree(temp_path) return istorage
def test_get_current_site_url(self): current_site = Site.objects.get_current() protocol = getattr(settings, 'MY_SITE_PROTOCOL', 'http') url = '%s://%s' % (protocol, current_site.domain) self.assertEquals(utils.current_site_url(), url)
import os import tempfile import csv import shutil import logging from django.core.files.uploadedfile import UploadedFile from hs_core.hydroshare.utils import resource_modified, current_site_url from hs_core.hydroshare.resource import delete_resource_file_only, add_resource_files logger = logging.getLogger(__name__) RES_LANDING_PAGE_URL_TEMPLATE = current_site_url() + "/resource/{0}/" CSV_FULL_NAME_TEMPLATE = "collection_list_{0}.csv" DELETED_RES_STRING = "Resource Deleted" def add_or_remove_relation_metadata(add=True, target_res_obj=None, relation_type="", relation_value="", set_res_modified=False, last_change_user=None): """ add new or remove relation metadata to/from target res obj :param add: True -- add metadata; False -- remove metadata :param target_res_obj: the target res obj to receive the change :param relation_type: "hasPart" or "isPartOf" :param relation_value: value of relation :param set_res_modified: set bag modified flag to True or False :param last_change_user: the User obj represents the last_change_by user (only works when set_res_modified is True) :return: """