Example #1
0
def import_from_string(xml_string, targetstatus, copy_status, owner_id=None):
    """
    Import a single resource from a string representation of its XML tree, 
    and save it with the given target status.
    
    Returns the imported resource object on success, raises and Exception on failure.
    """
    from metashare.repository.models import resourceInfoType_model
    result = resourceInfoType_model.import_from_string(xml_string,
                                                       copy_status=copy_status)

    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)

    resource = result[0]

    # Set publication_status for the new object. Also make sure that the
    # deletion flag is not set (may happen in case of re-importing a previously
    # deleted resource).
    resource.storage_object.publication_status = targetstatus
    resource.storage_object.deleted = False
    if owner_id:
        resource.owners.add(owner_id)
        for edt_grp in User.objects.get(id=owner_id).userprofile \
                .default_editor_groups.all():
            resource.editor_groups.add(edt_grp)
        # this also takes care of saving the storage_object
        resource.save()
    else:
        resource.storage_object.save()

    # explicitly write metadata XML and storage object to the storage folder
    resource.storage_object.update_storage()

    # Create log ADDITION message for the new object, but only if we have a user:
    if owner_id:
        LogEntry.objects.log_action(
            user_id=owner_id,
            content_type_id=ContentType.objects.get_for_model(resource).pk,
            object_id=resource.pk,
            object_repr=force_unicode(resource),
            action_flag=ADDITION)

    # Update statistics
    saveLRStats(resource, UPDATE_STAT)

    return resource
Example #2
0
def import_from_string(xml_string, targetstatus, copy_status, owner_id=None):
    """
    Import a single resource from a string representation of its XML tree, 
    and save it with the given target status.
    
    Returns the imported resource object on success, raises and Exception on failure.
    """
    from metashare.repository.models import resourceInfoType_model
    result = resourceInfoType_model.import_from_string(xml_string, copy_status=copy_status)
    
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    
    resource = result[0]
    
    # Set publication_status for the new object. Also make sure that the
    # deletion flag is not set (may happen in case of re-importing a previously
    # deleted resource).
    resource.storage_object.publication_status = targetstatus
    resource.storage_object.deleted = False
    if owner_id:
        resource.owners.add(owner_id)
        for edt_grp in User.objects.get(id=owner_id).get_profile() \
                .default_editor_groups.all():
            resource.editor_groups.add(edt_grp)
        # this also takes care of saving the storage_object
        resource.save()
    else:
        resource.storage_object.save()

    # explicitly write metadata XML and storage object to the storage folder
    resource.storage_object.update_storage()

    # Create log ADDITION message for the new object, but only if we have a user:
    if owner_id:
        LogEntry.objects.log_action(
            user_id         = owner_id,
            content_type_id = ContentType.objects.get_for_model(resource).pk,
            object_id       = resource.pk,
            object_repr     = force_unicode(resource),
            action_flag     = ADDITION
        )

    # Update statistics
    saveLRStats(resource, UPDATE_STAT)

    return resource
Example #3
0
def import_from_string(xml_string, targetstatus, owner_id=None):
    """
    Import a single resource from a string representation of its XML tree, 
    and save it with the given target status.
    
    Returns the imported resource object on success, raises and Exception on failure.
    """
    from metashare.repository.models import resourceInfoType_model
    result = resourceInfoType_model.import_from_string(xml_string)
    
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    
    resource = result[0]
    
    # Set publication_status for new object.
    resource.storage_object.publication_status = targetstatus
    if owner_id:
        resource.owners.add(owner_id)
        
    resource.storage_object.save()
    
    # explicitly write metadata XML and storage object to the storage folder
    resource.storage_object.update_storage()

    # Create log ADDITION message for the new object, but only if we have a user:
    if owner_id:
        LogEntry.objects.log_action(
            user_id         = owner_id,
            content_type_id = ContentType.objects.get_for_model(resource).pk,
            object_id       = resource.pk,
            object_repr     = force_unicode(resource),
            action_flag     = ADDITION
        )

    # Update statistics
    saveLRStats(resource, "", "", UPDATE_STAT)

    return resource
Example #4
0
def import_resources(import_folder):
    """
    Imports resources from the given folder.
    """
    # Check that SOLR is running, or else all resources will stay at status INTERNAL:
    from metashare.repository import verify_at_startup
    verify_at_startup()  # may raise Exception, which we don't want to catch.

    # Disable verbose debug output for the import process...
    settings.DEBUG = False
    os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True'

    from metashare.repository.supermodel import OBJECT_XML_CACHE

    # Clean cache before starting the import process.
    OBJECT_XML_CACHE.clear()

    # iterate over storage folder content
    from django.core import serializers
    from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS
    from metashare.repository.models import resourceInfoType_model

    imported_resources = []
    erroneous_descriptors = []

    storage_path = os.path.join(import_folder, STORAGE_FOLDER)
    for folder_name in os.listdir(storage_path):
        folder_path = "{}/{}/".format(storage_path, folder_name)
        if os.path.isdir(folder_path):
            try:
                print "importing from folder: '{0}'".format(folder_name)
                # import storage object
                so_filename = os.path.join(folder_path, STORAGE)
                so_in = open(so_filename, "rb")
                for obj in serializers.deserialize("xml", so_in):
                    print "importing storage object"
                    # storage.xml only contains a single storage object
                    storage_obj = obj.object
                    # this storage object is NOT saved!
                    # we only copy the relevant attributes from this storage
                    # object to the one at the resource!
                so_in.close()
                # import resource object
                ro_filename = os.path.join(folder_path, RESOURCE)
                ro_in = open(ro_filename, "rb")
                for obj in serializers.deserialize("xml", ro_in):
                    print "importing resource object"
                    # resource.xml only contains a single resource object
                    res_obj = obj
                    # the deserialized object contains the ManyToMany attributes
                    # in m2m_data
                ro_in.close()
                # import resource from metadata.xml
                res_filename = os.path.join(folder_path, METADATA)
                temp_file = open(res_filename, 'rb')
                xml_string = temp_file.read()
                result = resourceInfoType_model.import_from_string(
                    xml_string, copy_status=MASTER)
                if not result[0]:
                    msg = u''
                    if len(result) > 2:
                        msg = u'{}'.format(result[2])
                    raise Exception(msg)
                res = result[0]
                # update imported resource with imported resource object
                # and storage object
                _update_resource(res, res_obj, storage_obj)
                # copy possible binaries archives
                for archive_name in [
                        ARCHIVE_TPL.format(_ext)
                        for _ext in ALLOWED_ARCHIVE_EXTENSIONS
                ]:
                    archive_filename = os.path.join(folder_path, archive_name)
                    if os.path.isfile(archive_filename):
                        print "copying archive"
                        res_storage_path = '{0}/{1}/'.format(
                            settings.STORAGE_PATH,
                            res.storage_object.identifier)
                        shutil.copy(
                            archive_filename,
                            os.path.join(res_storage_path, archive_name))
                        # there can be at most one binary
                        break
                imported_resources.append(res)
            except Exception as problem:
                from django import db
                if isinstance(problem, db.utils.DatabaseError):
                    # reset database connection (required for PostgreSQL)
                    db.close_connection()
                erroneous_descriptors.append((folder_name, problem))

    print "Done.  Successfully imported {0} resources into the database, " \
      "errors occurred in {1} cases.".format(
      len(imported_resources), len(erroneous_descriptors))
    if len(erroneous_descriptors) > 0:
        print "The following resources could not be imported:"
        for descriptor, exception in erroneous_descriptors:
            print "\t{}: {}".format(descriptor, exception)

    # Be nice and cleanup cache...
    _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()])
    OBJECT_XML_CACHE.clear()
    print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size)

    from django.core.management import call_command
    call_command('rebuild_index', interactive=False)
Example #5
0
def restore_from_folder(storage_id, copy_status=MASTER, \
                        storage_digest=None, source_node=None, force_digest=False):
    """
    Restores the storage object and the associated resource for the given
    storage object identifier and makes it persistent in the database. 
    
    storage_id: the storage object identifier; it is assumed that this is the
        folder name in the storage folder folder where serialized storage object
        and metadata XML are located
    
    copy_status (optional): one of MASTER, REMOTE, PROXY; if present, used as
        copy status for the restored resource
    
    storage_digest (optional): the digest_checksum to set in the restored
        storage object

    source_node (optional): the source node if to set in the restored
        storage object
    
    force_digest (optional): if True, always recreate the digest zip-archive
    
    Returns the restored resource with its storage object set.
    """
    from metashare.repository.models import resourceInfoType_model

    # if a storage object with this id already exists, delete it
    try:
        _so = StorageObject.objects.get(identifier=storage_id)
        _so.delete()
    except ObjectDoesNotExist:
        _so = None

    storage_folder = os.path.join(settings.STORAGE_PATH, storage_id)

    # get most current metadata.xml
    _files = os.listdir(storage_folder)
    _metadata_files = \
        sorted(
            [f for f in _files if f.startswith('metadata')],
            reverse=True)
    if not _metadata_files:
        raise Exception('no metadata.xml found')
    # restore resource from metadata.xml
    _metadata_file = open('{0}/{1}'.format(storage_folder, _metadata_files[0]),
                          'rb')
    _xml_string = _metadata_file.read()
    _metadata_file.close()
    result = resourceInfoType_model.import_from_string(_xml_string,
                                                       copy_status=copy_status)
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    resource = result[0]
    # at this point, a storage object is already created at the resource, so update it
    _storage_object = resource.storage_object
    _storage_object.metadata = _xml_string

    # add global storage object attributes if available
    if os.path.isfile('{0}/storage-global.json'.format(storage_folder)):
        _global_json = \
            _fill_storage_object(_storage_object, '{0}/storage-global.json'.format(storage_folder))
        _storage_object.global_storage = _global_json
    else:
        LOGGER.warn('missing storage-global.json, importing resource as new')
        _storage_object.identifier = storage_id

    # add local storage object attributes if available
    if os.path.isfile('{0}/storage-local.json'.format(storage_folder)):
        _local_json = \
            _fill_storage_object(_storage_object, '{0}/storage-local.json'.format(storage_folder))
        _storage_object.local_storage = _local_json
        # always use the provided copy status, even if its different from the
        # one in the local storage object
        if copy_status:
            if _storage_object.copy_status != copy_status:
                LOGGER.warn(
                    'overwriting copy status from storage-local.json with "{}"'
                    .format(copy_status))
            _storage_object.copy_status = copy_status
    else:
        if copy_status:
            _storage_object.copy_status = copy_status
        else:
            # no copy status and no local storage object is provided, so use
            # a default
            LOGGER.warn(
                'no copy status provided, using default copy status MASTER')
            _storage_object.copy_status = MASTER

    # set storage digest if provided (usually for non-local resources)
    if storage_digest:
        _storage_object.digest_checksum = storage_digest
    # set source node id if provided (usually for non-local resources)
    if source_node:
        _storage_object.source_node = source_node

    _storage_object.update_storage(force_digest=force_digest)
    # update_storage includes saving
    # _storage_object.save()

    return resource
Example #6
0
def restore_from_folder(storage_id, copy_status=None):
    """
    Restores the storage object and the associated resource for the given
    storage object identifier and makes it persistent in the database. 
    
    storage_id: the storage object identifier; it is assumed that this is the
        folder name in the storage folder folder where serialized storage object
        and metadata XML are located
    
    copy_status (optional): one of MASTER, REMOTE, PROXY; if present, used as
        copy status for the restored resource
        
    Returns the restored resource with its storage object set.
    """
    from metashare.repository.models import resourceInfoType_model
    
    # if a storage object with this id already exists, delete it
    try:
        _so = StorageObject.objects.get(identifier=storage_id)
        _so.delete()
    except ObjectDoesNotExist:
        _so = None
    
    storage_folder = os.path.join(settings.STORAGE_PATH, storage_id)

    # get most current metadata.xml
    _files = os.listdir(storage_folder)
    _metadata_files = \
      sorted(
        [f for f in _files if f.startswith('metadata')],
        reverse=True)
    if not _metadata_files:
        raise Exception('no metadata.xml found')
    # restore resource from metadata.xml
    _metadata_file = open('{0}/{1}'.format(storage_folder, _metadata_files[0]), 'rb')
    _xml_string = _metadata_file.read()
    _metadata_file.close()
    result = resourceInfoType_model.import_from_string(_xml_string)
    if not result[0]:
        msg = u''
        if len(result) > 2:
            msg = u'{}'.format(result[2])
        raise Exception(msg)
    resource = result[0]
    # at this point, a storage object is already created at the resource, so update it 
    _storage_object = resource.storage_object
    _storage_object.metadata = _xml_string
    
    # add global storage object attributes if available
    if os.path.isfile('{0}/storage-global.json'.format(storage_folder)):
        _global_json = \
          _fill_storage_object(_storage_object, '{0}/storage-global.json'.format(storage_folder))
        _storage_object.global_storage = _global_json
    else:
        LOGGER.warn('missing storage-global.json, importing resource as new')
        _storage_object.identifier = storage_id
    # add local storage object attributes if available 
    if os.path.isfile('{0}/storage-local.json'.format(storage_folder)):
        _local_json = \
          _fill_storage_object(_storage_object, '{0}/storage-local.json'.format(storage_folder))
        _storage_object.local_storage = _local_json
        # always use the provided copy status, even if its different from the
        # one in the local storage object
        if copy_status:
            if _storage_object.copy_status != copy_status:
                LOGGER.warn('overwriting copy status from storage-local.json with "{}"'.format(copy_status))
            _storage_object.copy_status = copy_status
    else:
        if copy_status:
            _storage_object.copy_status = copy_status
        else:
            # no copy status and no local storage object is provided, so use
            # a default
            LOGGER.warn('no copy status provided, using default copy status MASTER')
            _storage_object.copy_status = MASTER

    _storage_object.save()
    _storage_object.update_storage()
        
    return resource
def import_resources(import_folder):
    """
    Imports resources from the given folder.
    """
    # Check that SOLR is running, or else all resources will stay at status INTERNAL:
    from metashare.repository import verify_at_startup
    verify_at_startup() # may raise Exception, which we don't want to catch.

    # Disable verbose debug output for the import process...
    settings.DEBUG = False
    os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True'
    
    from metashare.repository.supermodel import OBJECT_XML_CACHE

    # Clean cache before starting the import process.
    OBJECT_XML_CACHE.clear()
    
    # iterate over storage folder content
    from django.core import serializers
    from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS
    from metashare.repository.models import resourceInfoType_model

    imported_resources = []
    erroneous_descriptors = []

    storage_path = os.path.join(import_folder, STORAGE_FOLDER)
    for folder_name in os.listdir(storage_path):
        folder_path = "{}/{}/".format(storage_path, folder_name)
        if os.path.isdir(folder_path):
            try:
                print "importing from folder: '{0}'".format(folder_name)
                # import storage object
                so_filename = os.path.join(folder_path, STORAGE)
                so_in = open(so_filename, "rb")
                for obj in serializers.deserialize("xml", so_in):
                    print "importing storage object"
                    # storage.xml only contains a single storage object
                    storage_obj = obj.object
                    # this storage object is NOT saved!
                    # we only copy the relevant attributes from this storage
                    # object to the one at the resource!
                so_in.close()
                # import resource object
                ro_filename = os.path.join(folder_path, RESOURCE)
                ro_in = open(ro_filename, "rb")
                for obj in serializers.deserialize("xml", ro_in):
                    print "importing resource object"
                    # resource.xml only contains a single resource object
                    res_obj = obj
                    # the deserialized object contains the ManyToMany attributes
                    # in m2m_data
                ro_in.close()
                # import resource from metadata.xml
                res_filename = os.path.join(folder_path, METADATA)
                temp_file = open(res_filename, 'rb')
                xml_string = temp_file.read()
                result = resourceInfoType_model.import_from_string(
                  xml_string, copy_status=MASTER)
                if not result[0]:
                    msg = u''
                    if len(result) > 2:
                        msg = u'{}'.format(result[2])
                    raise Exception(msg)
                res = result[0]
                # update imported resource with imported resource object 
                # and storage object
                _update_resource(res, res_obj, storage_obj)
                # copy possible binaries archives
                for archive_name in [ARCHIVE_TPL.format(_ext)
                                     for _ext in ALLOWED_ARCHIVE_EXTENSIONS]:
                    archive_filename = os.path.join(folder_path, archive_name)
                    if os.path.isfile(archive_filename):
                        print "copying archive"
                        res_storage_path = '{0}/{1}/'.format(
                          settings.STORAGE_PATH, res.storage_object.identifier)
                        shutil.copy(archive_filename,
                          os.path.join(res_storage_path, archive_name))
                        # there can be at most one binary
                        break
                imported_resources.append(res)
            except Exception as problem:
                from django import db
                if isinstance(problem, db.utils.DatabaseError):
                    # reset database connection (required for PostgreSQL)
                    db.close_connection()
                erroneous_descriptors.append((folder_name, problem))

    print "Done.  Successfully imported {0} resources into the database, " \
      "errors occurred in {1} cases.".format(
      len(imported_resources), len(erroneous_descriptors))
    if len(erroneous_descriptors) > 0:
        print "The following resources could not be imported:"
        for descriptor, exception in erroneous_descriptors:
            print "\t{}: {}".format(descriptor, exception)

    # Be nice and cleanup cache...
    _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()])
    OBJECT_XML_CACHE.clear()
    print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size)
    
    from django.core.management import call_command
    call_command('rebuild_index', interactive=False)
Example #8
0
def import_xml(filename, copy_status=MASTER):
    _xml = open(filename)
    _xml_string = _xml.read()
    _xml.close()
    result = resourceInfoType_model.import_from_string(_xml_string, copy_status=copy_status)
    return result
Example #9
0
def import_xml(filename):
    _xml = open(filename)
    _xml_string = _xml.read()
    _xml.close()
    result = resourceInfoType_model.import_from_string(_xml_string)
    return result