def assert_import_equals_export(self, _roundtrip):
     _result = test_utils.import_xml(_roundtrip)
     with open(_roundtrip) as _import_file:
         _import_xml = _import_file.read()
         register_namespace("", SCHEMA_NAMESPACE)
         _import_xml = to_xml_string(fromstring(_import_xml), encoding="utf-8")
     _export_xml = to_xml_string(_result.export_to_elementtree(), encoding="utf-8")
     # cfedermann: uncomment these lines to dump import/export XML to file.
     #
     # with open('/tmp/_import.xml', 'wb') as _out:
     #    _out.write(_import_xml.encode('utf-8'))
     # with open('/tmp/_export.xml', 'wb') as _out:
     #    _out.write(_export_xml.encode('utf-8'))
     diff = "\n".join(unified_diff(_import_xml.split("\n"), _export_xml.split("\n")))
     self.assertEqual(
         _import_xml, _export_xml, msg="For file {0}, export differs from import:\n{1}".format(_roundtrip, diff)
     )
Exemple #2
0
    def check_metadata(self):
        """
        Checks if the metadata of the resource has changed with respect to the
        current metadata serialization. If yes, recreates the serialization,
        updates it in the storage folder and increases the revision (for master
        copies)
        
        Returns a flag indicating if the serialization was updated. 
        """

        # flag to indicate if rebuilding of metadata.xml is required
        update_xml = False

        # create current version of metadata XML
        from metashare.xml_utils import to_xml_string
        try:
            _metadata = to_xml_string(
                # pylint: disable-msg=E1101
                self.resourceinfotype_model_set.all()
                [0].export_to_elementtree(),
                # use ASCII encoding to convert non-ASCII chars to entities
                encoding="ASCII")
        except:
            # pylint: disable-msg=E1101
            LOGGER.error('PROBLEMATIC: %s - count: %s',
                         self.identifier,
                         self.resourceinfotype_model_set.count(),
                         exc_info=True)
            raise

        if self.metadata != _metadata:
            self.metadata = _metadata
            LOGGER.debug(u"\nMETADATA: {0}\n".format(self.metadata))
            self.modified = datetime.now()
            update_xml = True
            # increase revision for ingested and published resources whenever
            # the metadata XML changes for master copies
            if self.publication_status in (INGESTED, PUBLISHED) \
                    and self.copy_status == MASTER:
                self.revision += 1

        # check if there exists a metadata XML file; this is not the case if
        # the publication status just changed from internal to ingested
        # or if the resource was received when syncing
        if self.publication_status in (INGESTED, PUBLISHED) \
                and not os.path.isfile(
                    '{0}/metadata-{1:04d}.xml'.format(self._storage_folder(), self.revision)):
            update_xml = True

        if update_xml:
            # serialize metadata
            with open(
                    '{0}/metadata-{1:04d}.xml'.format(self._storage_folder(),
                                                      self.revision),
                    'wb') as _out:
                _out.write(unicode(self.metadata).encode('ASCII'))

        return update_xml
Exemple #3
0
def xml_to_json(obj):
    list_fields = ['distributionInfo', 'licenceInfo', 'corpusTextInfo',
                   'distributionMedium', 'downloadLocation', 'executionLocation',
                   'attributionText', 'iprHolder', 'contactPerson', 'surname']

    # get xml representation
    xml_string = to_xml_string(obj.export_to_elementtree(), encoding="utf-8").encode("utf-8")

    # parse xml to dict
    dict_repr = xmltodict.parse(xml_string, force_list=list_fields)
    return json.dumps(dict_repr, indent=4).encode('utf8')
Exemple #4
0
    def check_metadata(self):
        """
        Checks if the metadata of the resource has changed with respect to the
        current metadata serialization. If yes, recreates the serialization,
        updates it in the storage folder and increases the revision (for master
        copies)
        
        Returns a flag indicating if the serialization was updated. 
        """

        # flag to indicate if rebuilding of metadata.xml is required
        update_xml = False

        # create current version of metadata XML
        from metashare.xml_utils import to_xml_string

        try:
            _metadata = to_xml_string(
                # pylint: disable-msg=E1101
                self.resourceinfotype_model_set.all()[0].export_to_elementtree(),
                # use ASCII encoding to convert non-ASCII chars to entities
                encoding="ASCII",
            )
        except:
            # pylint: disable-msg=E1101
            LOGGER.error(
                "PROBLEMATIC: %s - count: %s", self.identifier, self.resourceinfotype_model_set.count(), exc_info=True
            )
            raise

        if self.metadata != _metadata:
            self.metadata = _metadata
            LOGGER.debug(u"\nMETADATA: {0}\n".format(self.metadata))
            self.modified = datetime.now()
            update_xml = True
            # increase revision for ingested and published resources whenever
            # the metadata XML changes for master copies
            if self.publication_status in (INGESTED, PUBLISHED) and self.copy_status == MASTER:
                self.revision += 1

        # check if there exists a metadata XML file; this is not the case if
        # the publication status just changed from internal to ingested
        # or if the resource was received when syncing
        if self.publication_status in (INGESTED, PUBLISHED) and not os.path.isfile(
            "{0}/metadata-{1:04d}.xml".format(self._storage_folder(), self.revision)
        ):
            update_xml = True

        if update_xml:
            # serialize metadata
            with open("{0}/metadata-{1:04d}.xml".format(self._storage_folder(), self.revision), "wb") as _out:
                _out.write(unicode(self.metadata).encode("ASCII"))

        return update_xml
Exemple #5
0
 def assert_import_equals_export(self, _roundtrip):
     _result = test_utils.import_xml(_roundtrip)
     with open(_roundtrip) as _import_file:
         _import_xml = _import_file.read()
         register_namespace('', SCHEMA_NAMESPACE)
         _import_xml = to_xml_string(fromstring(_import_xml),
                                     encoding="utf-8")
     _export_xml = to_xml_string(_result.export_to_elementtree(),
                                 encoding="utf-8")
     # cfedermann: uncomment these lines to dump import/export XML to file.
     #
     #with open('/tmp/_import.xml', 'wb') as _out:
     #    _out.write(_import_xml.encode('utf-8'))
     #with open('/tmp/_export.xml', 'wb') as _out:
     #    _out.write(_export_xml.encode('utf-8'))
     diff = '\n'.join(
         unified_diff(_import_xml.split('\n'), _export_xml.split('\n')))
     self.assertEqual(
         _import_xml,
         _export_xml,
         msg='For file {0}, export differs from import:\n{1}'.format(
             _roundtrip, diff.encode('utf-8')))
Exemple #6
0
def extract_source_resource_metadata(res_id):
    res = resourceInfoType_model.objects.get(id=res_id)
    res_owners = res.owners.all()
    view_path = res.get_absolute_url()
    try:
        root_node = res.export_to_elementtree()
        xml_string = to_xml_string(root_node, encoding="utf-8").encode('utf-8')
        return {
            "resource": res,
            "uri": "{}{}".format(DJANGO_URL, view_path),
            "owners": res_owners,
            "metadata": xml_string,
        }

    except:
        print "Could not import resource with id {}: \"{}\"".format(
            res_id, res)
Exemple #7
0
    
    # Disable verbose debug output for the import process...
    settings.DEBUG = True
    
    SUCCESSFUL_EXPORTS = 0
    ERRONEOUS_EXPORTS = 0
    RESOURCE_NO = 0
    from metashare.repository.models import resourceInfoType_model
    from metashare.xml_utils import to_xml_string
    with ZipFile(sys.argv[1], 'w') as out:
        for resource in resourceInfoType_model.objects.all():
            # skip rsources marked as deleted
            if resource.storage_object.deleted == True:
                continue
            try:
                RESOURCE_NO += 1
                root_node = resource.export_to_elementtree()
                xml_string = to_xml_string(
                  root_node, encoding="utf-8").encode('utf-8')
                resource_filename = 'resource-{0}.xml'.format(RESOURCE_NO)
                out.writestr(resource_filename, xml_string)
                SUCCESSFUL_EXPORTS += 1
            
            except Exception:
                ERRONEOUS_EXPORTS += 1
                print 'Could not export resource id={0}!'.format(resource.id)
                print traceback.format_exc()
    
    print "Done. Successfully exported {0} files from the database, errors " \
      "occured in {1} cases.".format(SUCCESSFUL_EXPORTS, ERRONEOUS_EXPORTS)
Exemple #8
0
    def update_storage(self):
        """
        Updates the metadata XML if required and serializes it and this storage
        object to the storage folder.
        """
        # for internal resources, no serialization is done
        if self.publication_status is INTERNAL:
            return
        
        # check if the storage folder for this storage object instance exists
        if self._storage_folder() and not exists(self._storage_folder()):
            # If not, create the storage folder.
            mkdir(self._storage_folder())
        
        # update the checksum, if a downloadable file exists
        if self.master_copy:
            self._compute_checksum()
        
        self.digest_last_checked = datetime.now()        

        # flag to indicate if rebuilding of metadata.xml is required
        update_xml = False
        
        # create current version of metadata XML
        from metashare.xml_utils import to_xml_string
        _metadata = to_xml_string(
          # pylint: disable-msg=E1101
          self.resourceinfotype_model_set.all()[0].export_to_elementtree(),
          # use ASCII encoding to convert non-ASCII chars to entities
          encoding="ASCII")
        
        if self.metadata != _metadata:
            self.metadata = _metadata
            self.modified = datetime.now()
            # increase revision for ingested and published resources whenever 
            # the metadata XML changes
            if self.publication_status in (INGESTED, PUBLISHED):
                self.revision += 1
                update_xml = True
            LOGGER.debug(u"\nMETADATA: {0}\n".format(self.metadata))
            
        # check if there exists a metadata XML file; this is not the case if
        # the publication status just changed from internal to ingested
        # or if the resource was received when syncing
        if self.publication_status in (INGESTED, PUBLISHED) \
          and not os.path.isfile(
          '{0}/metadata-{1:04d}.xml'.format(self._storage_folder(), self.revision)):
            update_xml = True

        # flag to indicate if rebuilding of resource.zip is required
        update_zip = False
          
        if update_xml:
            # serialize metadata
            with open('{0}/metadata-{1:04d}.xml'.format(
              self._storage_folder(), self.revision), 'wb') as _out:
                _out.write(unicode(self.metadata).encode('ASCII'))
            update_zip = True
        
        # check if global storage object serialization has changed; if yes,
        # save it to storage folder
        _dict_global = { }
        for item in GLOBAL_STORAGE_ATTS:
            _dict_global[item] = getattr(self, item)
        _global_storage = \
          dumps(_dict_global, cls=DjangoJSONEncoder, sort_keys=True, separators=(',',':'))
        if self.global_storage != _global_storage:
            self.global_storage = _global_storage
            if self.publication_status in (INGESTED, PUBLISHED):
                with open('{0}/storage-global.json'.format(
                  self._storage_folder()), 'wb') as _out:
                    _out.write(unicode(self.global_storage).encode('utf-8'))
                update_zip = True
        
        # create new digest zip if required, but only for master and proxy copies
        if update_zip and self.copy_status in (MASTER, PROXY):
            _zf_name = '{0}/resource.zip'.format(self._storage_folder())
            _zf = zipfile.ZipFile(_zf_name, mode='w', compression=ZIP_DEFLATED)
            try:
                _zf.write(
                  '{0}/metadata-{1:04d}.xml'.format(self._storage_folder(), self.revision),
                  arcname='metadata.xml')
                _zf.write(
                  '{0}/storage-global.json'.format(self._storage_folder()),
                  arcname='storage-global.json')
            finally:
                _zf.close()
            # update zip digest checksum
            self.digest_checksum = \
              compute_digest_checksum(self.metadata, self.global_storage)
            # update last modified timestamp
            self.digest_modified = datetime.now()
            
        # check if local storage object serialization has changed; if yes,
        # save it to storage folder
        _dict_local = { }
        for item in LOCAL_STORAGE_ATTS:
            _dict_local[item] = getattr(self, item)
        _local_storage = \
          dumps(_dict_local, cls=DjangoJSONEncoder, sort_keys=True, separators=(',',':'))
        if self.local_storage != _local_storage:
            self.local_storage = _local_storage
            if self.publication_status in (INGESTED, PUBLISHED):
                with open('{0}/storage-local.json'.format(
                  self._storage_folder()), 'wb') as _out:
                    _out.write(unicode(self.local_storage).encode('utf-8'))
        
        # save storage object if required; this is always required since at 
        # least self.digest_last_checked has changed
        self.save()