except ImportError: sys.stderr.write("Error: Can't find the file 'settings.py' in the " \ "directory containing %r. It appears you've customized things.\n" \ "You'll have to run django-admin.py, passing it your settings " \ "module.\n(If the file settings.py does indeed exist, it's causing" \ " an ImportError somehow.)\n" % __file__) sys.exit(1) if __name__ == "__main__": os.environ['DJANGO_SETTINGS_MODULE'] = 'settings' PROJECT_HOME = os.path.normpath(os.getcwd() + "/..") sys.path.append(PROJECT_HOME) # Check that SOLR is running, or else all resources will stay at status INTERNAL: from metashare.repository import verify_at_startup verify_at_startup() # may raise Exception, which we don't want to catch. # Disable verbose debug output for the import process... settings.DEBUG = False os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True' from metashare.storage.models import update_digests update_digests() from django.core.management import call_command call_command('rebuild_index', interactive=False)
def import_resources(import_folder): """ Imports resources from the given folder. """ # Check that SOLR is running, or else all resources will stay at status INTERNAL: from metashare.repository import verify_at_startup verify_at_startup() # may raise Exception, which we don't want to catch. # Disable verbose debug output for the import process... settings.DEBUG = False os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True' from metashare.repository.supermodel import OBJECT_XML_CACHE # Clean cache before starting the import process. OBJECT_XML_CACHE.clear() # iterate over storage folder content from django.core import serializers from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS from metashare.repository.models import resourceInfoType_model imported_resources = [] erroneous_descriptors = [] storage_path = os.path.join(import_folder, STORAGE_FOLDER) for folder_name in os.listdir(storage_path): folder_path = "{}/{}/".format(storage_path, folder_name) if os.path.isdir(folder_path): try: print "importing from folder: '{0}'".format(folder_name) # import storage object so_filename = os.path.join(folder_path, STORAGE) so_in = open(so_filename, "rb") for obj in serializers.deserialize("xml", so_in): print "importing storage object" # storage.xml only contains a single storage object storage_obj = obj.object # this storage object is NOT saved! # we only copy the relevant attributes from this storage # object to the one at the resource! so_in.close() # import resource object ro_filename = os.path.join(folder_path, RESOURCE) ro_in = open(ro_filename, "rb") for obj in serializers.deserialize("xml", ro_in): print "importing resource object" # resource.xml only contains a single resource object res_obj = obj # the deserialized object contains the ManyToMany attributes # in m2m_data ro_in.close() # import resource from metadata.xml res_filename = os.path.join(folder_path, METADATA) temp_file = open(res_filename, 'rb') xml_string = temp_file.read() result = resourceInfoType_model.import_from_string( xml_string, copy_status=MASTER) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) res = result[0] # update imported resource with imported resource object # and storage object _update_resource(res, res_obj, storage_obj) # copy possible binaries archives for archive_name in [ ARCHIVE_TPL.format(_ext) for _ext in ALLOWED_ARCHIVE_EXTENSIONS ]: archive_filename = os.path.join(folder_path, archive_name) if os.path.isfile(archive_filename): print "copying archive" res_storage_path = '{0}/{1}/'.format( settings.STORAGE_PATH, res.storage_object.identifier) shutil.copy( archive_filename, os.path.join(res_storage_path, archive_name)) # there can be at most one binary break imported_resources.append(res) except Exception as problem: from django import db if isinstance(problem, db.utils.DatabaseError): # reset database connection (required for PostgreSQL) db.close_connection() erroneous_descriptors.append((folder_name, problem)) print "Done. Successfully imported {0} resources into the database, " \ "errors occurred in {1} cases.".format( len(imported_resources), len(erroneous_descriptors)) if len(erroneous_descriptors) > 0: print "The following resources could not be imported:" for descriptor, exception in erroneous_descriptors: print "\t{}: {}".format(descriptor, exception) # Be nice and cleanup cache... _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()]) OBJECT_XML_CACHE.clear() print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size) from django.core.management import call_command call_command('rebuild_index', interactive=False)
#!/usr/bin/env python import os import sys import django if __name__ == "__main__": os.environ.setdefault("DJANGO_SETTINGS_MODULE", "metashare.settings") # MS, 21.03.2012: Add a fail-early verification "hook" fail_early_commands = ('runserver', 'runfcgi') if len(sys.argv) > 1: command = sys.argv[1] if command in fail_early_commands: django.setup() from metashare.repository import verify_at_startup verify_at_startup( ) # may raise Exception, which we don't want to catch. from django.core.management import execute_from_command_line execute_from_command_line(sys.argv)
def import_resources(import_folder): """ Imports resources from the given folder. """ # Check that SOLR is running, or else all resources will stay at status INTERNAL: from metashare.repository import verify_at_startup verify_at_startup() # may raise Exception, which we don't want to catch. # Disable verbose debug output for the import process... settings.DEBUG = False os.environ['DISABLE_INDEXING_DURING_IMPORT'] = 'True' from metashare.repository.supermodel import OBJECT_XML_CACHE # Clean cache before starting the import process. OBJECT_XML_CACHE.clear() # iterate over storage folder content from django.core import serializers from metashare.storage.models import MASTER, ALLOWED_ARCHIVE_EXTENSIONS from metashare.repository.models import resourceInfoType_model imported_resources = [] erroneous_descriptors = [] storage_path = os.path.join(import_folder, STORAGE_FOLDER) for folder_name in os.listdir(storage_path): folder_path = "{}/{}/".format(storage_path, folder_name) if os.path.isdir(folder_path): try: print "importing from folder: '{0}'".format(folder_name) # import storage object so_filename = os.path.join(folder_path, STORAGE) so_in = open(so_filename, "rb") for obj in serializers.deserialize("xml", so_in): print "importing storage object" # storage.xml only contains a single storage object storage_obj = obj.object # this storage object is NOT saved! # we only copy the relevant attributes from this storage # object to the one at the resource! so_in.close() # import resource object ro_filename = os.path.join(folder_path, RESOURCE) ro_in = open(ro_filename, "rb") for obj in serializers.deserialize("xml", ro_in): print "importing resource object" # resource.xml only contains a single resource object res_obj = obj # the deserialized object contains the ManyToMany attributes # in m2m_data ro_in.close() # import resource from metadata.xml res_filename = os.path.join(folder_path, METADATA) temp_file = open(res_filename, 'rb') xml_string = temp_file.read() result = resourceInfoType_model.import_from_string( xml_string, copy_status=MASTER) if not result[0]: msg = u'' if len(result) > 2: msg = u'{}'.format(result[2]) raise Exception(msg) res = result[0] # update imported resource with imported resource object # and storage object _update_resource(res, res_obj, storage_obj) # copy possible binaries archives for archive_name in [ARCHIVE_TPL.format(_ext) for _ext in ALLOWED_ARCHIVE_EXTENSIONS]: archive_filename = os.path.join(folder_path, archive_name) if os.path.isfile(archive_filename): print "copying archive" res_storage_path = '{0}/{1}/'.format( settings.STORAGE_PATH, res.storage_object.identifier) shutil.copy(archive_filename, os.path.join(res_storage_path, archive_name)) # there can be at most one binary break imported_resources.append(res) except Exception as problem: from django import db if isinstance(problem, db.utils.DatabaseError): # reset database connection (required for PostgreSQL) db.close_connection() erroneous_descriptors.append((folder_name, problem)) print "Done. Successfully imported {0} resources into the database, " \ "errors occurred in {1} cases.".format( len(imported_resources), len(erroneous_descriptors)) if len(erroneous_descriptors) > 0: print "The following resources could not be imported:" for descriptor, exception in erroneous_descriptors: print "\t{}: {}".format(descriptor, exception) # Be nice and cleanup cache... _cache_size = sum([len(x) for x in OBJECT_XML_CACHE.values()]) OBJECT_XML_CACHE.clear() print "Cleared OBJECT_XML_CACHE ({} bytes)".format(_cache_size) from django.core.management import call_command call_command('rebuild_index', interactive=False)