def add_to_xml_container(self, container):
        """Generates xml+rdf representation of all the metadata elements associated with this
        logical file type instance. Subclass must override this if it has additional metadata
        elements."""

        NAMESPACES = CoreMetaData.NAMESPACES
        dataset_container = etree.SubElement(
            container, '{%s}Dataset' % NAMESPACES['hsterms'])
        rdf_Description = etree.SubElement(
            dataset_container, '{%s}Description' % NAMESPACES['rdf'])
        dc_datatype = etree.SubElement(rdf_Description,
                                       '{%s}type' % NAMESPACES['dc'])
        data_type = current_site_url(
        ) + "/terms/" + self.logical_file.data_type
        dc_datatype.set('{%s}resource' % NAMESPACES['rdf'], data_type)

        if self.logical_file.dataset_name:
            dc_datatitle = etree.SubElement(rdf_Description,
                                            '{%s}title' % NAMESPACES['dc'])
            dc_datatitle.text = self.logical_file.dataset_name

        # add fileType node
        for res_file in self.logical_file.files.all():
            hsterms_datafile = etree.SubElement(
                rdf_Description, '{%s}dataFile' % NAMESPACES['hsterms'])
            rdf_dataFile_Description = etree.SubElement(
                hsterms_datafile, '{%s}Description' % NAMESPACES['rdf'])
            file_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format(
                hs_url=current_site_url(),
                res_id=self.logical_file.resource.short_id,
                file_name=res_file.short_path)
            rdf_dataFile_Description.set('{%s}about' % NAMESPACES['rdf'],
                                         file_uri)
            dc_title = etree.SubElement(rdf_dataFile_Description,
                                        '{%s}title' % NAMESPACES['dc'])

            file_name = get_resource_file_name_and_extension(res_file)[1]
            dc_title.text = file_name

            dc_format = etree.SubElement(rdf_dataFile_Description,
                                         '{%s}format' % NAMESPACES['dc'])
            dc_format.text = res_file.mime_type

        self.add_keywords_to_xml_container(rdf_Description)
        self.add_extra_metadata_to_xml_container(rdf_Description)
        for coverage in self.coverages.all():
            coverage.add_to_xml_container(rdf_Description)
        return rdf_Description
Exemple #2
0
    def get_xml(self, pretty_print=True):
        from lxml import etree

        # get the xml string representation of the core metadata elements
        xml_string = super(ModelInstanceMetaData, self).get_xml(pretty_print=False)

        # create an etree xml object
        RDF_ROOT = etree.fromstring(xml_string)

        # get root 'Description' element that contains all other elements
        container = RDF_ROOT.find('rdf:Description', namespaces=self.NAMESPACES)

        if self.model_output:
            hsterms_model_output = etree.SubElement(container, '{%s}ModelOutput' % self.NAMESPACES['hsterms'])
            hsterms_model_output_rdf_Description = etree.SubElement(hsterms_model_output,
                                                                    '{%s}Description' % self.NAMESPACES['rdf'])
            hsterms_model_output_value = etree.SubElement(hsterms_model_output_rdf_Description,
                                                          '{%s}IncludesModelOutput' % self.NAMESPACES['hsterms'])
            if self.model_output.includes_output == True:
                hsterms_model_output_value.text = "Yes"
            else:
                hsterms_model_output_value.text = "No"
        if self.executed_by:
            hsterms_executed_by = etree.SubElement(container, '{%s}ExecutedBy' % self.NAMESPACES['hsterms'])
            hsterms_executed_by_rdf_Description = etree.SubElement(hsterms_executed_by,
                                                                   '{%s}Description' % self.NAMESPACES['rdf'])
            hsterms_executed_by_name = etree.SubElement(hsterms_executed_by_rdf_Description,
                                                        '{%s}ModelProgramName' % self.NAMESPACES['hsterms'])

            title = self.executed_by.model_program_fk.title if self.executed_by.model_program_fk else "Unspecified"
            hsterms_executed_by_name.text = title

            hsterms_executed_by_url = etree.SubElement(hsterms_executed_by_rdf_Description,
                                                       '{%s}ModelProgramURL' % self.NAMESPACES['hsterms'])

            url = '%s%s' % (utils.current_site_url(), self.executed_by.model_program_fk.get_absolute_url()) if self.executed_by.model_program_fk else "None"

            hsterms_executed_by_url.text = url

        return etree.tostring(RDF_ROOT, pretty_print=True)
Exemple #3
0
 def modelProgramIdentifier(self):
     if self.model_program_fk:
         return '%s%s' % (utils.current_site_url(),
                          self.model_program_fk.get_absolute_url())
     else:
         return "None"
Exemple #4
0
import os
import tempfile
import csv
import shutil
import logging

from django.core.files.uploadedfile import UploadedFile

from hs_core.hydroshare.utils import resource_modified, current_site_url
from hs_core.hydroshare.resource import delete_resource_file_only, add_resource_files

logger = logging.getLogger(__name__)
RES_LANDING_PAGE_URL_TEMPLATE = current_site_url() + "/resource/{0}/"
CSV_FULL_NAME_TEMPLATE = "collection_list_{0}.csv"
DELETED_RES_STRING = "Resource Deleted"


def add_or_remove_relation_metadata(add=True,
                                    target_res_obj=None,
                                    relation_type="",
                                    relation_value="",
                                    set_res_modified=False,
                                    last_change_user=None):
    """
    add new or remove relation metadata to/from target res obj
    :param add: True -- add metadata; False -- remove metadata
    :param target_res_obj: the target res obj to receive the change
    :param relation_type: "hasPart" or "isPartOf"
    :param relation_value: value of relation
    :param set_res_modified: set bag modified flag to True or False
    :param last_change_user: the User obj represents the last_change_by user
Exemple #5
0
def create_bag_files(resource):
    """
    create and update files needed by bagit operation that is conducted on iRODS server;
    no bagit operation is performed, only files that will be included in the bag are created
    or updated.

    Parameters:
    :param resource: A resource whose files will be created or updated to be included in the
    resource bag.
    :return: istorage, an IrodsStorage object that will be used by subsequent operation to
    create a bag on demand as needed.
    """
    from hs_core.hydroshare.utils import current_site_url, get_file_mime_type

    istorage = resource.get_irods_storage()

    # the temp_path is a temporary holding path to make the files available to iRODS
    # we have to make temp_path unique even for the same resource with same update time
    # to accommodate asynchronous multiple file move operations for the same resource

    # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result!
    temp_path = istorage.getUniqueTmpPath

    try:
        os.makedirs(temp_path)
    except OSError as ex:
        # TODO: there might be concurrent operations.
        if ex.errno == errno.EEXIST:
            shutil.rmtree(temp_path)
            os.makedirs(temp_path)
        else:
            raise Exception(ex.message)

    # an empty visualization directory will not be put into the zipped bag file by ibun command,
    # so creating an empty visualization directory to be put into the zip file as done by the two
    # statements below does not work. However, if visualization directory has content to be
    # uploaded, it will work. This is to be implemented as part of the resource model in the future.
    # The following two statements are placeholders serving as reminder
    # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id)
    # istorage.saveFile('', to_file_name, create_directory=True)

    # create resourcemetadata.xml in local directory and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemetadata.xml')
    with open(from_file_name, 'w') as out:
        # write resource level metadata
        out.write(resource.get_metadata_xml())
    to_file_name = os.path.join(resource.root_path, 'data', 'resourcemetadata.xml')
    istorage.saveFile(from_file_name, to_file_name, True)

    # URLs are found in the /data/ subdirectory to comply with bagit format assumptions
    current_site_url = current_site_url()
    # This is the qualified resource url.
    hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id, 'data')
    # this is the path to the resourcemedata file for download
    metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml')
    # this is the path to the resourcemap file for download
    res_map_url = os.path.join(hs_res_url, 'resourcemap.xml')

    # make the resource map:
    utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/')
    utils.namespaceSearchOrder.append('citoterms')

    ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation')
    a = Aggregation(ag_url)

    # Set properties of the aggregation
    a._dc.title = resource.metadata.title.value
    a._dcterms.type = URIRef(resource.metadata.type.url)
    a._citoterms.isDocumentedBy = metadata_url
    a._ore.isDescribedBy = res_map_url

    res_type_aggregation = AggregatedResource(resource.metadata.type.url)
    res_type_aggregation._rdfs.label = resource._meta.verbose_name
    res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms"

    a.add_resource(res_type_aggregation)

    # Create a description of the metadata document that describes the whole resource and add it
    # to the aggregation
    resMetaFile = AggregatedResource(metadata_url)
    resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \
                            "resource"
    resMetaFile._citoterms.documents = ag_url
    resMetaFile._ore.isAggregatedBy = ag_url
    resMetaFile._dc.format = "application/rdf+xml"
    a.add_resource(resMetaFile)

    # Add the resource files to the aggregation
    files = ResourceFile.objects.filter(object_id=resource.id)

    for f in files:
        # only the files that are not part of file type aggregation (logical file)
        # should be added to the resource level map xml file
        if f.logical_file is None:
            res_uri = u'{hs_url}/resource/{res_id}/data/contents/{file_name}'.format(
                hs_url=current_site_url,
                res_id=resource.short_id,
                file_name=f.short_path)
            ar = AggregatedResource(res_uri)
            ar._ore.isAggregatedBy = ag_url
            ar._dc.format = get_file_mime_type(os.path.basename(f.short_path))
            a.add_resource(ar)

    # handle collection resource type
    # save contained resource urls into resourcemap.xml
    if resource.resource_type == "CollectionResource" and resource.resources:
        for contained_res in resource.resources.all():
            contained_res_id = contained_res.short_id
            resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format(
                    hs_url=current_site_url,
                    res_id=contained_res_id)

            ar = AggregatedResource(resource_map_url)
            ar._ore.isAggregatedBy = ag_url
            ar._dc.format = "application/rdf+xml"
            a.add_resource(ar)
    elif resource.resource_type == "CompositeResource":
        # add file type aggregations to resource aggregation
        for logical_file in resource.logical_files:
            if logical_file.has_parent:
                # skip nested aggregations
                continue
            aggr_uri = u'{hs_url}/resource/{res_id}/data/contents/{map_file_path}#aggregation'
            aggr_uri = aggr_uri.format(
                hs_url=current_site_url,
                res_id=resource.short_id,
                map_file_path=logical_file.map_short_file_path)
            agg = Aggregation(aggr_uri)
            agg._ore.isAggregatedBy = ag_url
            agg_type_url = "{site}/terms/{aggr_type}"
            agg_type_url = agg_type_url.format(site=current_site_url,
                                               aggr_type=logical_file.get_aggregation_type_name())
            agg._dcterms.type = URIRef(agg_type_url)
            a.add_resource(agg)

    # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI
    serializer = RdfLibSerializer('xml')
    resMap = a.register_serialization(serializer, res_map_url)
    resMap._dc.identifier = resource.short_id

    # Fetch the serialization
    remdoc = a.get_serialization()

    # change the namespace for the 'creator' element from 'dcterms' to 'dc'
    xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator')

    # delete this extra element
    # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/>
    xml_string = xml_string.replace(
        '<ore:aggregates rdf:resource="%s"/>\n' % str(resource.metadata.type.url), '')

    # create resourcemap.xml and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemap.xml')
    with open(from_file_name, 'w') as out:
        out.write(xml_string)
    to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml')
    istorage.saveFile(from_file_name, to_file_name, False)

    # if the resource is a composite resource generate aggregation metadata
    # and map xml documents
    if resource.resource_type == "CompositeResource":
        resource.create_aggregation_xml_documents()

    res_coll = resource.root_path
    istorage.setAVU(res_coll, 'metadata_dirty', "false")
    shutil.rmtree(temp_path)
    return istorage
Exemple #6
0
def create_bag_files(resource):
    """
    create and update files needed by bagit operation that is conducted on iRODS server;
    no bagit operation is performed, only files that will be included in the bag are created
    or updated.

    Parameters:
    :param resource: A resource whose files will be created or updated to be included in the
    resource bag.
    :return: istorage, an IrodsStorage object that will be used by subsequent operation to
    create a bag on demand as needed.
    """
    from hs_core.hydroshare.utils import current_site_url, get_file_mime_type

    istorage = resource.get_irods_storage()

    # the temp_path is a temporary holding path to make the files available to iRODS
    # we have to make temp_path unique even for the same resource with same update time
    # to accommodate asynchronous multiple file move operations for the same resource

    # TODO: This is always in /tmp; otherwise code breaks because open() is called on the result!
    temp_path = os.path.join(getattr(settings, 'IRODS_ROOT', '/tmp'),
                             uuid4().hex)

    try:
        os.makedirs(temp_path)
    except OSError as ex:
        # TODO: there might be concurrent operations.
        if ex.errno == errno.EEXIST:
            shutil.rmtree(temp_path)
            os.makedirs(temp_path)
        else:
            raise Exception(ex.message)

    # an empty visualization directory will not be put into the zipped bag file by ibun command,
    # so creating an empty visualization directory to be put into the zip file as done by the two
    # statements below does not work. However, if visualization directory has content to be
    # uploaded, it will work. This is to be implemented as part of the resource model in the future.
    # The following two statements are placeholders serving as reminder
    # to_file_name = '{res_id}/data/visualization/'.format(res_id=resource.short_id)
    # istorage.saveFile('', to_file_name, create_directory=True)

    # create resourcemetadata.xml in local directory and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemetadata.xml')
    with open(from_file_name, 'w') as out:
        # resources that don't support file types this would write only resource level metadata
        # resource types that support file types this would write resource level metadata
        # as well as file type metadata
        out.write(resource.get_metadata_xml())
    to_file_name = os.path.join(resource.root_path, 'data',
                                'resourcemetadata.xml')
    istorage.saveFile(from_file_name, to_file_name, True)

    # URLs are found in the /data/ subdirectory to comply with bagit format assumptions
    current_site_url = current_site_url()
    # This is the qualified resource url.
    hs_res_url = os.path.join(current_site_url, 'resource', resource.short_id,
                              'data')
    # this is the path to the resourcemedata file for download
    metadata_url = os.path.join(hs_res_url, 'resourcemetadata.xml')
    # this is the path to the resourcemap file for download
    res_map_url = os.path.join(hs_res_url, 'resourcemap.xml')

    # make the resource map:
    utils.namespaces['citoterms'] = Namespace('http://purl.org/spar/cito/')
    utils.namespaceSearchOrder.append('citoterms')

    ag_url = os.path.join(hs_res_url, 'resourcemap.xml#aggregation')
    a = Aggregation(ag_url)

    # Set properties of the aggregation
    a._dc.title = resource.metadata.title.value
    a._dcterms.type = URIRef(resource.metadata.type.url)
    a._citoterms.isDocumentedBy = metadata_url
    a._ore.isDescribedBy = res_map_url

    res_type_aggregation = AggregatedResource(resource.metadata.type.url)
    res_type_aggregation._rdfs.label = resource._meta.verbose_name
    res_type_aggregation._rdfs.isDefinedBy = current_site_url + "/terms"

    a.add_resource(res_type_aggregation)

    # Create a description of the metadata document that describes the whole resource and add it
    # to the aggregation
    resMetaFile = AggregatedResource(metadata_url)
    resMetaFile._dc.title = "Dublin Core science metadata document describing the HydroShare " \
                            "resource"
    resMetaFile._citoterms.documents = ag_url
    resMetaFile._ore.isAggregatedBy = ag_url
    resMetaFile._dc.format = "application/rdf+xml"

    # Create a description of the content file and add it to the aggregation
    files = ResourceFile.objects.filter(object_id=resource.id)
    resFiles = []
    for n, f in enumerate(files):
        res_uri = '{hs_url}/resource/{res_id}/data/contents/{file_name}'.format(
            hs_url=current_site_url,
            res_id=resource.short_id,
            file_name=f.short_path)
        resFiles.append(AggregatedResource(res_uri))
        resFiles[n]._ore.isAggregatedBy = ag_url
        resFiles[n]._dc.format = get_file_mime_type(
            os.path.basename(f.short_path))

    # Add the resource files to the aggregation
    a.add_resource(resMetaFile)
    for f in resFiles:
        a.add_resource(f)

    # handle collection resource type
    # save contained resource urls into resourcemap.xml
    if resource.resource_type == "CollectionResource" and resource.resources:
        for contained_res in resource.resources.all():
            contained_res_id = contained_res.short_id
            resource_map_url = '{hs_url}/resource/{res_id}/data/resourcemap.xml'.format(
                hs_url=current_site_url, res_id=contained_res_id)

            ar = AggregatedResource(resource_map_url)
            ar._ore.isAggregatedBy = ag_url
            ar._dc.format = "application/rdf+xml"
            a.add_resource(ar)

    # Register a serializer with the aggregation, which creates a new ResourceMap that needs a URI
    serializer = RdfLibSerializer('xml')
    resMap = a.register_serialization(serializer, res_map_url)
    resMap._dc.identifier = resource.short_id

    # Fetch the serialization
    remdoc = a.get_serialization()

    # change the namespace for the 'creator' element from 'dcterms' to 'dc'
    xml_string = remdoc.data.replace('dcterms:creator', 'dc:creator')

    # delete this extra element
    # <ore:aggregates rdf:resource="[hydroshare domain]/terms/[Resource class name]"/>
    xml_string = xml_string.replace(
        '<ore:aggregates rdf:resource="%s"/>\n' %
        str(resource.metadata.type.url), '')

    # create resourcemap.xml and upload it to iRODS
    from_file_name = os.path.join(temp_path, 'resourcemap.xml')
    with open(from_file_name, 'w') as out:
        out.write(xml_string)
    to_file_name = os.path.join(resource.root_path, 'data', 'resourcemap.xml')
    istorage.saveFile(from_file_name, to_file_name, False)

    res_coll = resource.root_path
    istorage.setAVU(res_coll, 'metadata_dirty', "false")
    shutil.rmtree(temp_path)
    return istorage
Exemple #7
0
 def test_get_current_site_url(self):
     current_site = Site.objects.get_current()
     protocol = getattr(settings, 'MY_SITE_PROTOCOL', 'http')
     url = '%s://%s' % (protocol, current_site.domain)
     self.assertEquals(utils.current_site_url(), url)
Exemple #8
0
 def modelProgramIdentifier(self):
     if self.model_program_fk:
         return '%s%s' % (utils.current_site_url(), self.model_program_fk.get_absolute_url())
     else:
         return "None"
Exemple #9
0
import os
import tempfile
import csv
import shutil
import logging

from django.core.files.uploadedfile import UploadedFile

from hs_core.hydroshare.utils import resource_modified, current_site_url
from hs_core.hydroshare.resource import delete_resource_file_only, add_resource_files

logger = logging.getLogger(__name__)
RES_LANDING_PAGE_URL_TEMPLATE = current_site_url() + "/resource/{0}/"
CSV_FULL_NAME_TEMPLATE = "collection_list_{0}.csv"
DELETED_RES_STRING = "Resource Deleted"


def add_or_remove_relation_metadata(add=True, target_res_obj=None, relation_type="",
                                    relation_value="", set_res_modified=False,
                                    last_change_user=None):
    """
    add new or remove relation metadata to/from target res obj
    :param add: True -- add metadata; False -- remove metadata
    :param target_res_obj: the target res obj to receive the change
    :param relation_type: "hasPart" or "isPartOf"
    :param relation_value: value of relation
    :param set_res_modified: set bag modified flag to True or False
    :param last_change_user: the User obj represents the last_change_by user
            (only works when set_res_modified is True)
    :return:
    """
Exemple #10
0
 def test_get_current_site_url(self):
     current_site = Site.objects.get_current()
     protocol = getattr(settings, 'MY_SITE_PROTOCOL', 'http')
     url = '%s://%s' % (protocol, current_site.domain)
     self.assertEquals(utils.current_site_url(), url)