def import_from_string(xml_string, targetstatus, copy_status, owner_id=None): """ Import a single resource from a string representation of its XML tree, and save it with the given target status. Returns the imported resource object on success, raises and Exception on failure. """ from metashare.repository.models import resourceInfoType_model result = resourceInfoType_model.import_from_string(xml_string, copy_status=copy_status) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) resource = result[0] # Set publication_status for the new object. Also make sure that the # deletion flag is not set (may happen in case of re-importing a previously # deleted resource). resource.storage_object.publication_status = targetstatus resource.storage_object.deleted = False if owner_id: resource.owners.add(owner_id) for edt_grp in User.objects.get(id=owner_id).userprofile \ .default_editor_groups.all(): resource.editor_groups.add(edt_grp) # this also takes care of saving the storage_object resource.save() else: resource.storage_object.save() # explicitly write metadata XML and storage object to the storage folder resource.storage_object.update_storage() # Create log ADDITION message for the new object, but only if we have a user: if owner_id: LogEntry.objects.log_action( user_id=owner_id, content_type_id=ContentType.objects.get_for_model(resource).pk, object_id=resource.pk, object_repr=force_unicode(resource), action_flag=ADDITION) # Update statistics saveLRStats(resource, UPDATE_STAT) return resource
def import_from_string(xml_string, targetstatus, copy_status, owner_id=None): """ Import a single resource from a string representation of its XML tree, and save it with the given target status. Returns the imported resource object on success, raises and Exception on failure. """ from metashare.repository.models import resourceInfoType_model result = resourceInfoType_model.import_from_string(xml_string, copy_status=copy_status) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) resource = result[0] # Set publication_status for the new object. Also make sure that the # deletion flag is not set (may happen in case of re-importing a previously # deleted resource). resource.storage_object.publication_status = targetstatus resource.storage_object.deleted = False if owner_id: resource.owners.add(owner_id) for edt_grp in User.objects.get(id=owner_id).get_profile() \ .default_editor_groups.all(): resource.editor_groups.add(edt_grp) # this also takes care of saving the storage_object resource.save() else: resource.storage_object.save() # explicitly write metadata XML and storage object to the storage folder resource.storage_object.update_storage() # Create log ADDITION message for the new object, but only if we have a user: if owner_id: LogEntry.objects.log_action( user_id = owner_id, content_type_id = ContentType.objects.get_for_model(resource).pk, object_id = resource.pk, object_repr = force_unicode(resource), action_flag = ADDITION ) # Update statistics saveLRStats(resource, UPDATE_STAT) return resource
def import_from_string(xml_string, targetstatus, owner_id=None): """ Import a single resource from a string representation of its XML tree, and save it with the given target status. Returns the imported resource object on success, raises and Exception on failure. """ from metashare.repository.models import resourceInfoType_model result = resourceInfoType_model.import_from_string(xml_string) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) resource = result[0] # Set publication_status for new object. resource.storage_object.publication_status = targetstatus if owner_id: resource.owners.add(owner_id) resource.storage_object.save() # explicitly write metadata XML and storage object to the storage folder resource.storage_object.update_storage() # Create log ADDITION message for the new object, but only if we have a user: if owner_id: LogEntry.objects.log_action( user_id = owner_id, content_type_id = ContentType.objects.get_for_model(resource).pk, object_id = resource.pk, object_repr = force_unicode(resource), action_flag = ADDITION ) # Update statistics saveLRStats(resource, "", "", UPDATE_STAT) return resource
def import_resources(import_folder): """ Imports resources from the given folder. """ # Check that SOLR is running, or else all resources will stay at status INTERNAL: from metashare.repository import verify_at_startup verify_at_startup() # may raise Exception, which we don't want to catch. # Disable verbose debug output for the import process... settings.DEBUG = False os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True' from metashare.repository.supermodel import OBJECT_XML_CACHE # Clean cache before starting the import process. OBJECT_XML_CACHE.clear() # iterate over storage folder content from django.core import serializers from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS from metashare.repository.models import resourceInfoType_model imported_resources = [] erroneous_descriptors = [] storage_path = os.path.join(import_folder, STORAGE_FOLDER) for folder_name in os.listdir(storage_path): folder_path = "{}/{}/".format(storage_path, folder_name) if os.path.isdir(folder_path): try: print "importing from folder: '{0}'".format(folder_name) # import storage object so_filename = os.path.join(folder_path, STORAGE) so_in = open(so_filename, "rb") for obj in serializers.deserialize("xml", so_in): print "importing storage object" # storage.xml only contains a single storage object storage_obj = obj.object # this storage object is NOT saved! # we only copy the relevant attributes from this storage # object to the one at the resource! so_in.close() # import resource object ro_filename = os.path.join(folder_path, RESOURCE) ro_in = open(ro_filename, "rb") for obj in serializers.deserialize("xml", ro_in): print "importing resource object" # resource.xml only contains a single resource object res_obj = obj # the deserialized object contains the ManyToMany attributes # in m2m_data ro_in.close() # import resource from metadata.xml res_filename = os.path.join(folder_path, METADATA) temp_file = open(res_filename, 'rb') xml_string = temp_file.read() result = resourceInfoType_model.import_from_string( xml_string, copy_status=MASTER) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) res = result[0] # update imported resource with imported resource object # and storage object _update_resource(res, res_obj, storage_obj) # copy possible binaries archives for archive_name in [ ARCHIVE_TPL.format(_ext) for _ext in ALLOWED_ARCHIVE_EXTENSIONS ]: archive_filename = os.path.join(folder_path, archive_name) if os.path.isfile(archive_filename): print "copying archive" res_storage_path = '{0}/{1}/'.format( settings.STORAGE_PATH, res.storage_object.identifier) shutil.copy( archive_filename, os.path.join(res_storage_path, archive_name)) # there can be at most one binary break imported_resources.append(res) except Exception as problem: from django import db if isinstance(problem, db.utils.DatabaseError): # reset database connection (required for PostgreSQL) db.close_connection() erroneous_descriptors.append((folder_name, problem)) print "Done. Successfully imported {0} resources into the database, " \ "errors occurred in {1} cases.".format( len(imported_resources), len(erroneous_descriptors)) if len(erroneous_descriptors) > 0: print "The following resources could not be imported:" for descriptor, exception in erroneous_descriptors: print "\t{}: {}".format(descriptor, exception) # Be nice and cleanup cache... _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()]) OBJECT_XML_CACHE.clear() print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size) from django.core.management import call_command call_command('rebuild_index', interactive=False)
def restore_from_folder(storage_id, copy_status=MASTER, \ storage_digest=None, source_node=None, force_digest=False): """ Restores the storage object and the associated resource for the given storage object identifier and makes it persistent in the database. storage_id: the storage object identifier; it is assumed that this is the folder name in the storage folder folder where serialized storage object and metadata XML are located copy_status (optional): one of MASTER, REMOTE, PROXY; if present, used as copy status for the restored resource storage_digest (optional): the digest_checksum to set in the restored storage object source_node (optional): the source node if to set in the restored storage object force_digest (optional): if True, always recreate the digest zip-archive Returns the restored resource with its storage object set. """ from metashare.repository.models import resourceInfoType_model # if a storage object with this id already exists, delete it try: _so = StorageObject.objects.get(identifier=storage_id) _so.delete() except ObjectDoesNotExist: _so = None storage_folder = os.path.join(settings.STORAGE_PATH, storage_id) # get most current metadata.xml _files = os.listdir(storage_folder) _metadata_files = \ sorted( [f for f in _files if f.startswith('metadata')], reverse=True) if not _metadata_files: raise Exception('no metadata.xml found') # restore resource from metadata.xml _metadata_file = open('{0}/{1}'.format(storage_folder, _metadata_files[0]), 'rb') _xml_string = _metadata_file.read() _metadata_file.close() result = resourceInfoType_model.import_from_string(_xml_string, copy_status=copy_status) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) resource = result[0] # at this point, a storage object is already created at the resource, so update it _storage_object = resource.storage_object _storage_object.metadata = _xml_string # add global storage object attributes if available if os.path.isfile('{0}/storage-global.json'.format(storage_folder)): _global_json = \ _fill_storage_object(_storage_object, '{0}/storage-global.json'.format(storage_folder)) _storage_object.global_storage = _global_json else: LOGGER.warn('missing storage-global.json, importing resource as new') _storage_object.identifier = storage_id # add local storage object attributes if available if os.path.isfile('{0}/storage-local.json'.format(storage_folder)): _local_json = \ _fill_storage_object(_storage_object, '{0}/storage-local.json'.format(storage_folder)) _storage_object.local_storage = _local_json # always use the provided copy status, even if its different from the # one in the local storage object if copy_status: if _storage_object.copy_status != copy_status: LOGGER.warn( 'overwriting copy status from storage-local.json with "{}"' .format(copy_status)) _storage_object.copy_status = copy_status else: if copy_status: _storage_object.copy_status = copy_status else: # no copy status and no local storage object is provided, so use # a default LOGGER.warn( 'no copy status provided, using default copy status MASTER') _storage_object.copy_status = MASTER # set storage digest if provided (usually for non-local resources) if storage_digest: _storage_object.digest_checksum = storage_digest # set source node id if provided (usually for non-local resources) if source_node: _storage_object.source_node = source_node _storage_object.update_storage(force_digest=force_digest) # update_storage includes saving # _storage_object.save() return resource
def restore_from_folder(storage_id, copy_status=None): """ Restores the storage object and the associated resource for the given storage object identifier and makes it persistent in the database. storage_id: the storage object identifier; it is assumed that this is the folder name in the storage folder folder where serialized storage object and metadata XML are located copy_status (optional): one of MASTER, REMOTE, PROXY; if present, used as copy status for the restored resource Returns the restored resource with its storage object set. """ from metashare.repository.models import resourceInfoType_model # if a storage object with this id already exists, delete it try: _so = StorageObject.objects.get(identifier=storage_id) _so.delete() except ObjectDoesNotExist: _so = None storage_folder = os.path.join(settings.STORAGE_PATH, storage_id) # get most current metadata.xml _files = os.listdir(storage_folder) _metadata_files = \ sorted( [f for f in _files if f.startswith('metadata')], reverse=True) if not _metadata_files: raise Exception('no metadata.xml found') # restore resource from metadata.xml _metadata_file = open('{0}/{1}'.format(storage_folder, _metadata_files[0]), 'rb') _xml_string = _metadata_file.read() _metadata_file.close() result = resourceInfoType_model.import_from_string(_xml_string) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) resource = result[0] # at this point, a storage object is already created at the resource, so update it _storage_object = resource.storage_object _storage_object.metadata = _xml_string # add global storage object attributes if available if os.path.isfile('{0}/storage-global.json'.format(storage_folder)): _global_json = \ _fill_storage_object(_storage_object, '{0}/storage-global.json'.format(storage_folder)) _storage_object.global_storage = _global_json else: LOGGER.warn('missing storage-global.json, importing resource as new') _storage_object.identifier = storage_id # add local storage object attributes if available if os.path.isfile('{0}/storage-local.json'.format(storage_folder)): _local_json = \ _fill_storage_object(_storage_object, '{0}/storage-local.json'.format(storage_folder)) _storage_object.local_storage = _local_json # always use the provided copy status, even if its different from the # one in the local storage object if copy_status: if _storage_object.copy_status != copy_status: LOGGER.warn('overwriting copy status from storage-local.json with "{}"'.format(copy_status)) _storage_object.copy_status = copy_status else: if copy_status: _storage_object.copy_status = copy_status else: # no copy status and no local storage object is provided, so use # a default LOGGER.warn('no copy status provided, using default copy status MASTER') _storage_object.copy_status = MASTER _storage_object.save() _storage_object.update_storage() return resource
def import_resources(import_folder): """ Imports resources from the given folder. """ # Check that SOLR is running, or else all resources will stay at status INTERNAL: from metashare.repository import verify_at_startup verify_at_startup() # may raise Exception, which we don't want to catch. # Disable verbose debug output for the import process... settings.DEBUG = False os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True' from metashare.repository.supermodel import OBJECT_XML_CACHE # Clean cache before starting the import process. OBJECT_XML_CACHE.clear() # iterate over storage folder content from django.core import serializers from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS from metashare.repository.models import resourceInfoType_model imported_resources = [] erroneous_descriptors = [] storage_path = os.path.join(import_folder, STORAGE_FOLDER) for folder_name in os.listdir(storage_path): folder_path = "{}/{}/".format(storage_path, folder_name) if os.path.isdir(folder_path): try: print "importing from folder: '{0}'".format(folder_name) # import storage object so_filename = os.path.join(folder_path, STORAGE) so_in = open(so_filename, "rb") for obj in serializers.deserialize("xml", so_in): print "importing storage object" # storage.xml only contains a single storage object storage_obj = obj.object # this storage object is NOT saved! # we only copy the relevant attributes from this storage # object to the one at the resource! so_in.close() # import resource object ro_filename = os.path.join(folder_path, RESOURCE) ro_in = open(ro_filename, "rb") for obj in serializers.deserialize("xml", ro_in): print "importing resource object" # resource.xml only contains a single resource object res_obj = obj # the deserialized object contains the ManyToMany attributes # in m2m_data ro_in.close() # import resource from metadata.xml res_filename = os.path.join(folder_path, METADATA) temp_file = open(res_filename, 'rb') xml_string = temp_file.read() result = resourceInfoType_model.import_from_string( xml_string, copy_status=MASTER) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) res = result[0] # update imported resource with imported resource object # and storage object _update_resource(res, res_obj, storage_obj) # copy possible binaries archives for archive_name in [ARCHIVE_TPL.format(_ext) for _ext in ALLOWED_ARCHIVE_EXTENSIONS]: archive_filename = os.path.join(folder_path, archive_name) if os.path.isfile(archive_filename): print "copying archive" res_storage_path = '{0}/{1}/'.format( settings.STORAGE_PATH, res.storage_object.identifier) shutil.copy(archive_filename, os.path.join(res_storage_path, archive_name)) # there can be at most one binary break imported_resources.append(res) except Exception as problem: from django import db if isinstance(problem, db.utils.DatabaseError): # reset database connection (required for PostgreSQL) db.close_connection() erroneous_descriptors.append((folder_name, problem)) print "Done. Successfully imported {0} resources into the database, " \ "errors occurred in {1} cases.".format( len(imported_resources), len(erroneous_descriptors)) if len(erroneous_descriptors) > 0: print "The following resources could not be imported:" for descriptor, exception in erroneous_descriptors: print "\t{}: {}".format(descriptor, exception) # Be nice and cleanup cache... _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()]) OBJECT_XML_CACHE.clear() print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size) from django.core.management import call_command call_command('rebuild_index', interactive=False)
def import_xml(filename, copy_status=MASTER): _xml = open(filename) _xml_string = _xml.read() _xml.close() result = resourceInfoType_model.import_from_string(_xml_string, copy_status=copy_status) return result
def import_xml(filename): _xml = open(filename) _xml_string = _xml.read() _xml.close() result = resourceInfoType_model.import_from_string(_xml_string) return result