Ejemplos de get_file_info en Python, ejemplos de storageService.get_file_info en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: rebuild_transfer_backlog.py Proyecto: wynerst/archivematica

    def populate_data_from_storage_service(self, es_client, pipeline_uuid):
        """Populate indices and/or database from Storage Service.

        :param es_client: Elasticsearch client.
        :param pipeline_uuid: UUID of origin pipeline for transfers to
        reindex.

        :returns: None
        """
        transfers = storageService.get_file_info(package_type="transfer")
        filtered_transfers = storageService.filter_packages(
            transfers, pipeline_uuid=pipeline_uuid)
        processed = 0
        for transfer in filtered_transfers:
            transfer_uuid = transfer["uuid"]
            temp_backlog_dir = tempfile.mkdtemp()
            try:
                local_package = storageService.download_package(
                    transfer_uuid, temp_backlog_dir)
            except storageService.Error:
                self.error(
                    "Transfer {} not indexed. Unable to download from Storage Service."
                    .format(transfer_uuid))
                continue
            # Transfers are downloaded as .tar files, so we extract files
            # before indexing.
            try:
                extract_package(local_package, temp_backlog_dir)
            except CalledProcessError as err:
                self.error(
                    "Transfer {0} not indexed. File extraction from tar failed: {1}."
                    .format(transfer_uuid, err))
                continue
            local_package_without_extension = am.package_name_from_path(
                local_package)
            transfer_indexed = False
            for entry in scandir(temp_backlog_dir):
                if entry.is_dir(
                ) and entry.name == local_package_without_extension:
                    transfer_path = entry.path
                    self.info(
                        "Importing transfer {} from temporarily downloaded copy."
                        .format(transfer_uuid))
                    _import_self_describing_transfer(
                        self,
                        es_client,
                        self.stdout,
                        Path(transfer_path),
                        transfer_uuid,
                        transfer["size"],
                    )
                    transfer_indexed = True
            shutil.rmtree(temp_backlog_dir)
            if transfer_indexed:
                processed += 1
            else:
                self.error(
                    "Transfer {} not indexed. Unable to find files extracted from tar."
                    .format(transfer_uuid))
        self.success("{} transfers indexed!".format(processed))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: post_store_aip_hook.py Proyecto: piql/archivematica

def dspace_handle_to_archivesspace(job, sip_uuid):
    """Fetch the DSpace handle from the Storage Service and send to ArchivesSpace."""
    # Get association to ArchivesSpace if it exists
    try:
        digital_object = models.ArchivesSpaceDigitalObject.objects.get(sip_id=sip_uuid)
    except models.ArchivesSpaceDigitalObject.DoesNotExist:
        job.pyprint('SIP', sip_uuid, 'not associated with an ArchivesSpace component')
        return NO_ACTION
    job.pyprint('Digital Object', digital_object.remoteid, 'for SIP', digital_object.sip_id, 'found')
    logger.info('Digital Object %s for SIP %s found', digital_object.remoteid, digital_object.sip_id)

    # Get dspace handle from SS
    file_info = storage_service.get_file_info(uuid=sip_uuid)[0]
    try:
        handle = file_info['misc_attributes']['handle']
    except KeyError:
        job.pyprint('AIP has no DSpace handle stored')
        return NO_ACTION
    job.pyprint('DSpace handle:', handle)
    logger.info('DSpace handle: %s', handle)

    # POST Dspace handle to ArchivesSpace
    # Get ArchivesSpace config
    config = models.DashboardSetting.objects.get_dict('upload-archivesspace_v0.0')
    archivesspace_url = config["base_url"]

    # Log in
    url = archivesspace_url + '/users/' + config['user'] + '/login'
    params = {'password': config['passwd']}
    logger.debug('Log in to ArchivesSpace URL: %s', url)
    response = requests.post(url, params=params, timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT)
    logger.debug('Response: %s %s', response, response.content)
    session_id = response.json()['session']
    headers = {'X-ArchivesSpace-Session': session_id}

    # Get Digital Object from ArchivesSpace
    url = archivesspace_url + digital_object.remoteid
    logger.debug('Get Digital Object info URL: %s', url)
    response = requests.get(url, headers=headers, timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT)
    logger.debug('Response: %s %s', response, response.content)
    body = response.json()

    # Update
    url = archivesspace_url + digital_object.remoteid
    file_version = {
        "file_uri": handle,
        "use_statement": config['use_statement'],
        "xlink_show_attribute": config['xlink_show'],
        "xlink_actuate_attribute": config['xlink_actuate'],
    }
    body['file_versions'].append(file_version)
    logger.debug('Modified Digital Object: %s', body)
    response = requests.post(url, headers=headers, json=body, timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT)
    job.pyprint('Update response:', response, response.content)
    logger.debug('Response: %s %s', response, response.content)
    if response.status_code != 200:
        job.pyprint('Error updating', digital_object.remoteid)
        return ERROR
    return COMPLETED

Ejemplo n.º 3

0

Mostrar archivo

def dspace_handle_to_archivesspace(sip_uuid):
    """Fetch the DSpace handle from the Storage Service and send to ArchivesSpace."""
    # Get association to ArchivesSpace if it exists
    try:
        digital_object = models.ArchivesSpaceDigitalObject.objects.get(sip_id=sip_uuid)
    except models.ArchivesSpaceDigitalObject.DoesNotExist:
        print('SIP', sip_uuid, 'not associated with an ArchivesSpace component')
        return NO_ACTION
    print('Digital Object', digital_object.remoteid, 'for SIP', digital_object.sip_id, 'found')
    logger.info('Digital Object %s for SIP %s found', digital_object.remoteid, digital_object.sip_id)

    # Get dspace handle from SS
    file_info = storage_service.get_file_info(uuid=sip_uuid)[0]
    try:
        handle = file_info['misc_attributes']['handle']
    except KeyError:
        print('AIP has no DSpace handle stored')
        return NO_ACTION
    print('DSpace handle:', handle)
    logger.info('DSpace handle: %s', handle)

    # POST Dspace handle to ArchivesSpace
    # Get ArchivesSpace config
    config = admin_models.ArchivesSpaceConfig.objects.all()[0]
    archivesspace_url = 'http://' + config.host + ':' + str(config.port)

    # Log in
    url = archivesspace_url + '/users/' + config.user + '/login'
    params = {'password': config.passwd}
    logger.debug('Log in to ArchivesSpace URL: %s', url)
    response = requests.post(url, params=params)
    logger.debug('Response: %s %s', response, response.content)
    session_id = response.json()['session']
    headers = {'X-ArchivesSpace-Session': session_id}

    # Get Digital Object from ArchivesSpace
    url = archivesspace_url + digital_object.remoteid
    logger.debug('Get Digital Object info URL: %s', url)
    response = requests.get(url, headers=headers)
    logger.debug('Response: %s %s', response, response.content)
    body = response.json()

    # Update
    url = archivesspace_url + digital_object.remoteid
    file_version = {
        "file_uri": handle,
        "use_statement": config.use_statement,
        "xlink_show_attribute": config.xlink_show,
        "xlink_actuate_attribute": config.xlink_actuate,
    }
    body['file_versions'].append(file_version)
    logger.debug('Modified Digital Object: %s', body)
    response = requests.post(url, headers=headers, json=body)
    print('Update response:', response, response.content)
    logger.debug('Response: %s %s', response, response.content)
    if response.status_code != 200:
        print('Error updating', digital_object.remoteid)
        return ERROR
    return COMPLETED

Ejemplo n.º 4

0

Mostrar archivo

Archivo: index_aip.py Proyecto: bkryza/archivematica

def index_aip(job):
    """Write AIP information to ElasticSearch. """
    sip_uuid = job.args[1]  # %SIPUUID%
    sip_name = job.args[2]  # %SIPName%
    sip_staging_path = job.args[3]  # %SIPDirectory%
    sip_type = job.args[4]  # %SIPType%
    if "aips" not in mcpclient_settings.SEARCH_ENABLED:
        logger.info("Skipping indexing: AIPs indexing is currently disabled.")
        return 0
    elasticSearchFunctions.setup_reading_from_conf(mcpclient_settings)
    client = elasticSearchFunctions.get_client()
    aip_info = storage_service.get_file_info(uuid=sip_uuid)
    job.pyprint("AIP info:", aip_info)
    aip_info = aip_info[0]
    mets_staging_path = os.path.join(sip_staging_path,
                                     "METS.{}.xml".format(sip_uuid))
    identifiers = get_identifiers(job, sip_staging_path)
    # If this is an AIC, find the number of AIP stored in it and index that
    aips_in_aic = None
    if sip_type == "AIC":
        try:
            uv = UnitVariable.objects.get(unittype="SIP",
                                          unituuid=sip_uuid,
                                          variable="AIPsinAIC")
            aips_in_aic = uv.variablevalue
        except UnitVariable.DoesNotExist:
            pass
    # Delete ES index before creating new one if reingesting
    if "REIN" in sip_type:
        job.pyprint(
            "Deleting outdated entry for AIP and AIP files with UUID",
            sip_uuid,
            "from archival storage",
        )
        elasticSearchFunctions.delete_aip(client, sip_uuid)
        elasticSearchFunctions.delete_aip_files(client, sip_uuid)
    job.pyprint("Indexing AIP and AIP files")
    # Even though we treat MODS identifiers as SIP-level, we need to index them
    # here because the archival storage tab actually searches on the
    # aips/aipfile index.
    ret = elasticSearchFunctions.index_aip_and_files(
        client=client,
        uuid=sip_uuid,
        aip_stored_path=aip_info["current_full_path"],
        mets_staging_path=mets_staging_path,
        name=sip_name,
        aip_size=aip_info["size"],
        aips_in_aic=aips_in_aic,
        identifiers=identifiers,
        encrypted=aip_info["encrypted"],
        printfn=job.pyprint,
    )
    if ret == 1:
        job.pyprint("Error indexing AIP and AIP files", file=sys.stderr)
    return ret

Ejemplo n.º 5

0

Mostrar archivo

Archivo: views.py Proyecto: helrond/archivematica

def aips_pending_deletion():
    aip_uuids = []
    try:
        aips = storage_service.get_file_info(status='DEL_REQ')
    except Exception as e:
        # TODO this should be messages.warning, but we need 'request' here
        logger.warning("Error retrieving AIPs pending deletion: is the storage server running?  Error: {}".format(e))
    else:
        for aip in aips:
            aip_uuids.append(aip['uuid'])
    return aip_uuids

Ejemplo n.º 6

0

Mostrar archivo

Archivo: views.py Proyecto: thinkronize/archivematica

def check_and_remove_deleted_transfers(es_client):
    """
    Check the storage service to see if transfers marked in ES as 'pending deletion' have been deleted yet. If so,
    remove the transfer and its files from ES. This is a bit of a kludge (that we do elsewhere e.g. in the storage tab),
    but it appears necessary as the storage service doesn't talk directly to ES.

    :return: None
    """
    query = {
        'query': {
            'bool': {
                'must': {
                    'match': {
                        'pending_deletion': True
                    }
                }
            }
        }
    }

    deletion_pending_results = es_client.search(body=query,
                                                index='transfers',
                                                doc_type='transfer',
                                                fields='uuid,status')

    for hit in deletion_pending_results['hits']['hits']:
        transfer_uuid = hit['fields']['uuid'][0]

        api_results = storage_service.get_file_info(uuid=transfer_uuid)
        try:
            status = api_results[0]['status']
        except IndexError:
            logger.info('Transfer not found in storage service: {}'.format(
                transfer_uuid))
            continue

        if status == 'DELETED':
            elasticSearchFunctions.remove_backlog_transfer_files(
                es_client, transfer_uuid)
            elasticSearchFunctions.remove_backlog_transfer(
                es_client, transfer_uuid)

Ejemplo n.º 7

0

Mostrar archivo

def check_and_remove_deleted_transfers(es_client):
    """
    Check the storage service to see if transfers marked in ES as 'pending deletion' have been deleted yet. If so,
    remove the transfer and its files from ES. This is a bit of a kludge (that we do elsewhere e.g. in the storage tab),
    but it appears necessary as the storage service doesn't talk directly to ES.

    :return: None
    """
    query = {
        "query": {
            "bool": {
                "must": {
                    "match": {
                        "pending_deletion": True
                    }
                }
            }
        }
    }

    deletion_pending_results = es_client.search(body=query,
                                                index="transfers",
                                                _source="uuid,status")

    for hit in deletion_pending_results["hits"]["hits"]:
        transfer_uuid = hit["_source"]["uuid"]

        api_results = storage_service.get_file_info(uuid=transfer_uuid)
        try:
            status = api_results[0]["status"]
        except IndexError:
            logger.info("Transfer not found in storage service: {}".format(
                transfer_uuid))
            continue

        if status == "DELETED":
            elasticSearchFunctions.remove_backlog_transfer_files(
                es_client, transfer_uuid)
            elasticSearchFunctions.remove_backlog_transfer(
                es_client, transfer_uuid)

Ejemplo n.º 8

0

Mostrar archivo

    def handle(self, *args, **options):
        # Ignore elasticsearch-py logging events unless they're errors.
        logging.getLogger("elasticsearch").setLevel(logging.ERROR)
        logging.getLogger("archivematica.common").setLevel(logging.ERROR)

        # Create temporary directory for downloaded METS files.
        temp_dir = tempfile.mkdtemp()

        pipeline_uuid = options["pipeline"]
        delete_all = options["delete_all"]

        delete_before_reindexing = False
        if options["delete"]:
            delete_before_reindexing = True

        if options["uuid"]:
            aips_to_index = storageService.get_file_info(uuid=options["uuid"])
            # If we're indexing only one AIP, don't delete the indices.
            delete_all = False
        else:
            # For bulk operations, index all AIPs and AICs associated
            # with the pipeline that are not deleted or replicas.
            packages = storageService.get_file_info()
            aips_to_index = storageService.filter_packages(
                packages,
                package_types=PACKAGE_TYPES_TO_INDEX,
                pipeline_uuid=pipeline_uuid,
                filter_replicas=True,
            )
        aips_to_index_count = len(aips_to_index)

        # If there's nothing to index, log error and quit.
        if not aips_to_index_count:
            self.error("No AIPs found to index. Quitting.")
            sys.exit(1)

        # Setup es_client and delete indices if required.
        es_client = setup_es_for_aip_reindexing(self, delete_all)
        self.info("Rebuilding 'aips' and 'aipfiles' indices")

        # Index packages.
        packages_not_indexed = []
        aip_indexed_count = 0
        for aip in aips_to_index:
            is_aic = False
            if aip["package_type"] == "AIC":
                is_aic = True
            index_success = self.process_package(
                es_client, aip, temp_dir, delete_before_reindexing, is_aic=is_aic
            )
            if index_success:
                aip_indexed_count += 1
            else:
                packages_not_indexed.append(aip["uuid"])

        # Clean up and report on packages indexed.
        self.info("Cleaning up")
        shutil.rmtree(temp_dir)

        if packages_not_indexed:
            self.error(
                "Indexing complete. Indexed {count} of {total} AIPs/AICs. Packages not indexed: {uuids}.".format(
                    count=aip_indexed_count,
                    total=aips_to_index_count,
                    uuids=", ".join(packages_not_indexed),
                )
            )
        else:
            pluralized_aips_aics_term = (
                "AIP/AIC" if aip_indexed_count == 1 else "AIPs/AICs"
            )
            self.success(
                "Indexing complete. Successfully indexed {count} {term}.".format(
                    count=aip_indexed_count, term=pluralized_aips_aics_term
                )
            )

Ejemplo n.º 9

0

Mostrar archivo

Archivo: views.py Proyecto: helrond/archivematica

def list_display(request):

    if 'aips' not in settings.SEARCH_ENABLED:
        return render(request, 'archival_storage/list.html')
    current_page_number = int(request.GET.get('page', 1))
    logger.debug('Current page: %s', current_page_number)

    # get count of AIP files
    es_client = elasticSearchFunctions.get_client()
    aip_indexed_file_count = aip_file_count(es_client)

    # get AIPs
    order_by = request.GET.get('order_by', 'name_unanalyzed')
    sort_by = request.GET.get('sort_by', 'up')

    if sort_by == 'down':
        sort_direction = 'desc'
    else:
        sort_direction = 'asc'

    sort_specification = order_by + ':' + sort_direction
    sort_params = 'order_by=' + order_by + '&sort_by=' + sort_by

    # get list of UUIDs of AIPs that are deleted or pending deletion
    aips_deleted_or_pending_deletion = []
    should_haves = [
        {'match': {'status': 'DEL_REQ'}},
        {'match': {'status': 'DELETED'}},
    ]
    query = {
        "query": {
            "bool": {
                "should": should_haves
            }
        }
    }
    deleted_aip_results = es_client.search(
        body=query,
        index='aips',
        doc_type='aip',
        fields='uuid,status'
    )
    for deleted_aip in deleted_aip_results['hits']['hits']:
        aips_deleted_or_pending_deletion.append(deleted_aip['fields']['uuid'][0])

    # Fetch results and paginate
    def es_pager(page, page_size):
        """
        Fetch one page of normalized entries from Elasticsearch.

        :param page: 1-indexed page to fetch
        :param page_size: Number of entries on a page
        :return: List of dicts for each entry, where keys and values have been cleaned up
        """
        start = (page - 1) * page_size
        results = es_client.search(
            index='aips',
            doc_type='aip',
            body=elasticSearchFunctions.MATCH_ALL_QUERY,
            fields='origin,uuid,filePath,created,name,size,encrypted',
            sort=sort_specification,
            size=page_size,
            from_=start,
        )
        # normalize results - each of the fields contains a single value,
        # but is returned from the ES API as a single-length array
        # e.g. {"fields": {"uuid": ["abcd"], "name": ["aip"] ...}}
        return [elasticSearchFunctions.normalize_results_dict(d) for d in results['hits']['hits']]

    items_per_page = 10
    count = es_client.count(index='aips', doc_type='aip', body=elasticSearchFunctions.MATCH_ALL_QUERY)['count']
    results = LazyPagedSequence(es_pager, page_size=items_per_page, length=count)

    # Paginate
    page = helpers.pager(
        results,
        items_per_page,
        current_page_number
    )

    # process deletion, etc., and format results
    aips = []
    for aip in page.object_list:
        # If an AIP was deleted or is pending deletion, react if status changed
        if aip['uuid'] in aips_deleted_or_pending_deletion:
            # check with storage server to see current status
            api_results = storage_service.get_file_info(uuid=aip['uuid'])
            try:
                aip_status = api_results[0]['status']
            except IndexError:
                # Storage service does not know about this AIP
                # TODO what should happen here?
                logger.info("AIP not found in storage service: {}".format(aip))
                continue

            # delete AIP metadata in ElasticSearch if AIP has been deleted from the
            # storage server
            # TODO: handle this asynchronously
            if aip_status == 'DELETED':
                elasticSearchFunctions.delete_aip(es_client, aip['uuid'])
                elasticSearchFunctions.delete_aip_files(es_client, aip['uuid'])
            elif aip_status != 'DEL_REQ':
                # update the status in ElasticSearch for this AIP
                elasticSearchFunctions.mark_aip_stored(es_client, aip['uuid'])
        else:
            aip_status = 'UPLOADED'

        # Tweak AIP presentation and add to display array
        if aip_status != 'DELETED':
            aip['status'] = AIP_STATUS_DESCRIPTIONS[aip_status]

            try:
                size = '{0:.2f} MB'.format(float(aip['size']))
            except (TypeError, ValueError):
                size = 'Removed'

            aip['size'] = size

            aip['href'] = aip['filePath'].replace(AIPSTOREPATH + '/', "AIPsStore/")
            aip['date'] = aip['created']

            aips.append(aip)

    total_size = total_size_of_aips(es_client)
    # Find out which AIPs are encrypted

    return render(request, 'archival_storage/list.html',
                  {
                      'total_size': total_size,
                      'aip_indexed_file_count': aip_indexed_file_count,
                      'aips': aips,
                      'page': page,
                      'search_params': sort_params,
                  }
                  )

Ejemplo n.º 10

0

Mostrar archivo

Archivo: storeAIP.py Proyecto: thinkronize/archivematica

def store_aip(aip_destination_uri, aip_path, sip_uuid, sip_name, sip_type):
    """ Stores an AIP with the storage service.

    aip_destination_uri = storage service destination URI, should be of purpose
        AIP Store (AS)
    aip_path = Full absolute path to the AIP's current location on the local
        filesystem
    sip_uuid = UUID of the SIP, which will become the UUID of the AIP
    sip_name = SIP name.  Not used directly, but part of the AIP name

    Example inputs:
    storeAIP.py
        "/api/v1/location/9c2b5bb7-abd6-477b-88e0-57107219dace/"
        "/var/archivematica/sharedDirectory/currentlyProcessing/ep6-0737708e-9b99-471a-b331-283e2244164f/ep6-0737708e-9b99-471a-b331-283e2244164f.7z"
        "0737708e-9b99-471a-b331-283e2244164f"
        "ep6"
    """

    # FIXME Assume current Location is the one set up by default until location
    # is passed in properly, or use Agent to make sure is correct CP
    current_location = storage_service.get_location(purpose="CP")[0]

    # If ``aip_path`` does not exist, this may be a DIP that was not uploaded.
    # In that case, it will be in the uploadDIP/ directory instead of the
    # uploadedDIPs/ directory.
    if not os.path.exists(aip_path):
        aip_path = get_upload_dip_path(aip_path)

    # Make aip_path relative to the Location
    shared_path = os.path.join(current_location['path'],
                               '')  # Ensure ends with /
    relative_aip_path = aip_path.replace(shared_path, '')

    # Get the package type: AIC or AIP
    if 'SIP' in sip_type or 'AIP' in sip_type:  # Also matches AIP-REIN
        package_type = "AIP"
    elif 'AIC' in sip_type:  # Also matches AIC-REIN
        package_type = 'AIC'
    elif 'DIP' in sip_type:
        package_type = 'DIP'

    # Uncompressed directory AIPs must be terminated in a /,
    # otherwise the storage service will place the directory
    # inside another directory of the same name.
    current_path = os.path.basename(aip_path)
    if os.path.isdir(aip_path) and not aip_path.endswith('/'):
        relative_aip_path = relative_aip_path + '/'

    # DIPs cannot share the AIP UUID, as the storage service depends on
    # having a unique UUID; assign a new one before uploading.
    # TODO allow mapping the AIP UUID to the DIP UUID for retrieval.
    related_package_uuid = None
    if sip_type == 'DIP':
        uuid = str(uuid4())
        print('Checking if DIP {} parent AIP has been created...'.format(uuid))

        # Set related package UUID, so a relationship to the parent AIP can be
        # created if if AIP has been stored. If the AIP hasn't yet been stored
        # take note of the DIP's UUID so it the relationship can later be
        # created when the AIP is stored.
        try:
            storage_service.get_file_info(uuid=sip_uuid)[0]  # Check existence
            related_package_uuid = sip_uuid
            print('Parent AIP exists so relationship can be created.')
        except IndexError:
            UnitVariable.objects.create(unittype='SIP',
                                        unituuid=sip_uuid,
                                        variable='relatedPackage',
                                        variablevalue=uuid)
            print(
                'Noting DIP UUID {} related to AIP so relationship can be created when AIP is stored.'
                .format(uuid))
    else:
        uuid = sip_uuid
        related_package = get_object_or_None(UnitVariable,
                                             unituuid=sip_uuid,
                                             variable='relatedPackage')
        related_package_uuid = related_package.variablevalue if related_package is not None else None

    # If AIP is a directory, calculate size recursively
    if os.path.isdir(aip_path):
        size = 0
        for dirpath, _, filenames in os.walk(aip_path):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)
                size += os.path.getsize(file_path)
    else:
        size = os.path.getsize(aip_path)

    # Get the AIP subtype from any DC type attribute supplied by the user for
    # the AIP. If found, this will replace 'Archival Information Package' in
    # ``<mets:div TYPE='Archival Information Package'>`` in the pointer file.
    sip_metadata_uuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761'
    try:
        dc = DublinCore.objects.get(metadataappliestotype_id=sip_metadata_uuid,
                                    metadataappliestoidentifier=uuid)
    except DublinCore.DoesNotExist:
        aip_subtype = 'Archival Information Package'
    else:
        aip_subtype = dc.type

    # Store the AIP
    (new_file, error_msg) = storage_service.create_file(
        uuid=uuid,
        origin_location=current_location['resource_uri'],
        origin_path=relative_aip_path,
        current_location=aip_destination_uri,
        current_path=current_path,
        package_type=package_type,
        aip_subtype=aip_subtype,
        size=size,
        update='REIN' in sip_type,
        related_package_uuid=related_package_uuid,
        events=get_events_from_db(uuid),
        agents=get_agents_from_db(uuid))

    if new_file is not None and new_file.get('status', '') != "FAIL":
        message = "Storage service created {}: {}".format(sip_type, new_file)
        LOGGER.info(message)
        print(message)
        sys.exit(0)
    else:
        print("{} creation failed.  See Storage Service logs for more details".
              format(sip_type),
              file=sys.stderr)
        print(error_msg or "Package status: Failed", file=sys.stderr)
        LOGGER.warning(
            "{} unabled to be created: {}.  See logs for more details.".format(
                sip_type, error_msg))
        sys.exit(1)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: indexAIP.py Proyecto: raulcesar/archivematica

def index_aip():
    """ Write AIP information to ElasticSearch. """
    sip_uuid = sys.argv[1]  # %SIPUUID%
    sip_name = sys.argv[2]  # %SIPName%
    sip_path = sys.argv[3]  # %SIPDirectory%
    sip_type = sys.argv[4]  # %SIPType%

    # Check if ElasticSearch is enabled
    client_config_path = '/etc/archivematica/MCPClient/clientConfig.conf'
    config = ConfigParser.SafeConfigParser()
    config.read(client_config_path)
    elastic_search_disabled = False
    try:
        elastic_search_disabled = config.getboolean(
            'MCPClient', "disableElasticsearchIndexing")
    except ConfigParser.NoOptionError:
        pass
    if elastic_search_disabled:
        print('Skipping indexing: indexing is currently disabled in', client_config_path)
        return 0

    print('SIP UUID:', sip_uuid)
    aip_info = storage_service.get_file_info(uuid=sip_uuid)
    print('AIP info:', aip_info)
    aip_info = aip_info[0]

    mets_name = 'METS.{}.xml'.format(sip_uuid)
    mets_path = os.path.join(sip_path, mets_name)

    mods_paths = list_mods(sip_path)
    identifiers = []
    for mods in mods_paths:
        identifiers.extend(extract_identifiers_from_mods(mods))

    # If this is an AIC, find the number of AIP stored in it and index that
    aips_in_aic = None
    if sip_type == "AIC":
        try:
            uv = UnitVariable.objects.get(unittype="SIP",
                                          unituuid=sip_uuid,
                                          variable="AIPsinAIC")
            aips_in_aic = uv.variablevalue
        except UnitVariable.DoesNotExist:
            pass

    print('Indexing AIP info')
    # Delete ES index before creating new one if reingesting
    if 'REIN' in sip_type:
        print('Deleting outdated entry for AIP and AIP files with UUID', sip_uuid, 'from archival storage')
        elasticSearchFunctions.delete_aip(sip_uuid)
        elasticSearchFunctions.connect_and_delete_aip_files(sip_uuid)

    # Index AIP
    elasticSearchFunctions.connect_and_index_aip(
        sip_uuid,
        sip_name,
        aip_info['current_full_path'],
        mets_path,
        size=aip_info['size'],
        aips_in_aic=aips_in_aic,
        identifiers=identifiers)

    # Index AIP files
    print('Indexing AIP files')
    # Even though we treat MODS identifiers as SIP-level, we need to index them
    # here because the archival storage tab actually searches on the
    # aips/aipfile index.
    exitCode = elasticSearchFunctions.connect_and_index_files(
        index='aips',
        type='aipfile',
        uuid=sip_uuid,
        pathToArchive=sip_path,
        identifiers=identifiers,
        sipName=sip_name,
    )
    if exitCode == 1:
        print('Error indexing AIP files', file=sys.stderr)
        return 1

    return 0

Ejemplo n.º 12

0

Mostrar archivo

Archivo: indexAIP.py Proyecto: thinkronize/archivematica

def index_aip():
    """ Write AIP information to ElasticSearch. """
    sip_uuid = sys.argv[1]  # %SIPUUID%
    sip_name = sys.argv[2]  # %SIPName%
    sip_path = sys.argv[3]  # %SIPDirectory%
    sip_type = sys.argv[4]  # %SIPType%

    if not mcpclient_settings.SEARCH_ENABLED:
        logger.info('Skipping indexing: indexing is currently disabled.')
        return 0

    elasticSearchFunctions.setup_reading_from_conf(mcpclient_settings)
    client = elasticSearchFunctions.get_client()

    print('SIP UUID:', sip_uuid)
    aip_info = storage_service.get_file_info(uuid=sip_uuid)
    print('AIP info:', aip_info)
    aip_info = aip_info[0]

    mets_name = 'METS.{}.xml'.format(sip_uuid)
    mets_path = os.path.join(sip_path, mets_name)

    identifiers = get_identifiers(sip_path)

    # If this is an AIC, find the number of AIP stored in it and index that
    aips_in_aic = None
    if sip_type == "AIC":
        try:
            uv = UnitVariable.objects.get(unittype="SIP",
                                          unituuid=sip_uuid,
                                          variable="AIPsinAIC")
            aips_in_aic = uv.variablevalue
        except UnitVariable.DoesNotExist:
            pass

    print('Indexing AIP info')
    # Delete ES index before creating new one if reingesting
    if 'REIN' in sip_type:
        print('Deleting outdated entry for AIP and AIP files with UUID',
              sip_uuid, 'from archival storage')
        elasticSearchFunctions.delete_aip(client, sip_uuid)
        elasticSearchFunctions.delete_aip_files(client, sip_uuid)

    # Index AIP
    elasticSearchFunctions.index_aip(client,
                                     sip_uuid,
                                     sip_name,
                                     aip_info['current_full_path'],
                                     mets_path,
                                     size=aip_info['size'],
                                     aips_in_aic=aips_in_aic,
                                     identifiers=identifiers,
                                     encrypted=aip_info['encrypted'])

    # Index AIP files
    print('Indexing AIP files')
    # Even though we treat MODS identifiers as SIP-level, we need to index them
    # here because the archival storage tab actually searches on the
    # aips/aipfile index.
    exitCode = elasticSearchFunctions.index_files(
        client,
        index='aips',
        type_='aipfile',
        uuid=sip_uuid,
        pathToArchive=sip_path,
        identifiers=identifiers,
        sipName=sip_name,
    )
    if exitCode == 1:
        print('Error indexing AIP files', file=sys.stderr)
        return 1

    return 0

Ejemplo n.º 13

0

Mostrar archivo

Archivo: index-aip-from-aipstore.py Proyecto: artefactual-labs/ops-helpers

def index_from_aipstore(uuid):

    # check if uuid exists in the AIPstore
    file_info = storage_service.get_file_info(uuid=uuid)
    if len(file_info) != 1:
        print("Error: number of packages returned from aipstore: {}. Must be 1".format(len(file_info)))
        return -1
    # check if package_type is "AIP""
    print("file info: {}".format(file_info))
    if file_info[0]['package_type'] != 'AIP':
        print("Error: package is not AIP: {}".format(file_info[0]['package_type']))
        return -2

    # get AIP file name from file info
    basename = os.path.basename(file_info[0]['current_path'])
    filename, file_extension = os.path.splitext(basename)

    # get aip download url
    aip_download_url = storage_service.download_file_url(file_uuid=uuid)
    print("AIP download URL: {}".format(aip_download_url))

    # create a temp directory for processing
    tempdir = tempfile.mkdtemp(prefix='aiptmp', dir=TMP_DIR_BASE)
    print("Created: {}".format(tempdir))

    # download file to temp directory
    urllib.urlretrieve(aip_download_url, os.path.join(tempdir, basename))
    print("aip downloaded to directory")

    # expand aip files
    command_string = "atool --extract-to=. {}".format(basename)
    print ("will execute: {}".format(command_string))
    p = subprocess.Popen(shlex.split(command_string),
                         cwd=tempdir,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                    )
                    
    output = p.communicate()
    if p.returncode == 0:
        print('Successfully extracted AIP')
        print('\n'.join(output))
    else:
        print('Failed to extract AIP')
        print('\n'.join(output))

    # delete downloaded file now that we have it expanded
    os.remove(os.path.join(tempdir, basename))

    # get aip path to pass to the client script
    dirlist = glob.glob(os.path.join(tempdir, "*"))
    if (len(dirlist) != 1):
        print("Error: {} must have only one directory".format(tempdir))
        return -4
    if (not os.path.isdir(dirlist[0])):
        print("Error: {} must be a directory".format(dirlist[0]))
        return -4
    
    # populate the 4 variables needed to call the aip index script
    sip_uuid=uuid
    sip_name=filename[:-37]     # strip uuid and dashes
    sip_path = os.path.join(dirlist[0],"data")  # METS etc inside the data/ directory of the AIP
    sip_type="REIN"     # setting as reingest so that existing index entries are removed beforehand 

    command_string = "./indexAIP.py {} {} {} {}".format(sip_uuid, sip_name, sip_path, sip_type)
    print ("will execute: {}".format(command_string))


    p = subprocess.Popen(shlex.split(command_string),
                         cwd="/usr/lib/archivematica/MCPClient/clientScripts",
                         env={"DJANGO_SETTINGS_MODULE": "settings.common", 
                              "PYTHONPATH": "/usr/share/archivematica/dashboard:/usr/lib/archivematica/archivematicaCommon" },
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                    )
                    
    output = p.communicate()
    if p.returncode == 0:
        print('Successfully indexed AIP {0}'.format(sip_uuid))
        print('\n'.join(output))
    else:
        print('Failed to index AIP {0}'.format(sip_uuid))
        print('\n'.join(output))

    # delete temporary processing directory
    shutil.rmtree(tempdir)

Ejemplo n.º 14

0

Mostrar archivo

def processAIPThenDeleteMETSFile(path,
                                 temp_dir,
                                 es_client,
                                 delete_existing_data=False):
    archive_file = os.path.basename(path)

    # Regex match the UUID - AIP might end with .7z, .tar.bz2, or
    # something else.
    match = re.search(
        r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
        archive_file)
    if match is not None:
        aip_uuid = match.group()
    else:
        return -1

    print("Processing AIP", aip_uuid)

    if delete_existing_data is True:
        print("Deleting AIP", aip_uuid, "from aips/aip and aips/aipfile.")
        elasticSearchFunctions.delete_aip(es_client, aip_uuid)
        elasticSearchFunctions.delete_aip_files(es_client, aip_uuid)

    # AIP filenames are <name>-<uuid><extension>
    # Index of match end is right before the extension
    subdir = archive_file[:match.end()]
    aip_name = subdir[:-37]
    mets_file = "METS." + aip_uuid + ".xml"
    mets_file_relative_path = os.path.join("data", mets_file)
    if os.path.isfile(path):
        mets_file_relative_path = os.path.join(subdir, mets_file_relative_path)
    path_to_mets = extract_file(
        archive_path=path,
        destination_dir=temp_dir,
        relative_path=mets_file_relative_path,
    )

    # If AIC, need to extract number of AIPs in AIC to index as well
    aips_in_aic = None
    root = etree.parse(path_to_mets)
    try:
        aip_type = ns.xml_find_premis(
            root,
            "mets:dmdSec/mets:mdWrap/mets:xmlData/dcterms:dublincore/dcterms:type"
        ).text
    except AttributeError:
        pass
    else:
        if aip_type == "Archival Information Collection":
            aips_in_aic = get_aips_in_aic(root, path, temp_dir)

    aip_info = storage_service.get_file_info(uuid=aip_uuid)

    if not aip_info:
        print("Information not found in Storage Service for AIP UUID: ",
              aip_uuid)
        return 1

    return elasticSearchFunctions.index_aip_and_files(
        client=es_client,
        uuid=aip_uuid,
        aip_stored_path=path,
        mets_staging_path=path_to_mets,
        name=aip_name,
        aip_size=aip_info[0]["size"],
        aips_in_aic=aips_in_aic,
        identifiers=[],  # TODO get these
    )

Ejemplo n.º 15

0

Mostrar archivo

def dspace_handle_to_archivesspace(job, sip_uuid):
    """Fetch the DSpace handle from the Storage Service and send to ArchivesSpace."""
    # Get association to ArchivesSpace if it exists
    try:
        digital_object = models.ArchivesSpaceDigitalObject.objects.get(
            sip_id=sip_uuid)
    except models.ArchivesSpaceDigitalObject.DoesNotExist:
        job.pyprint("SIP", sip_uuid,
                    "not associated with an ArchivesSpace component")
        return NO_ACTION
    job.pyprint(
        "Digital Object",
        digital_object.remoteid,
        "for SIP",
        digital_object.sip_id,
        "found",
    )
    logger.info(
        "Digital Object %s for SIP %s found",
        digital_object.remoteid,
        digital_object.sip_id,
    )

    # Get dspace handle from SS
    file_info = storage_service.get_file_info(uuid=sip_uuid)[0]
    try:
        handle = file_info["misc_attributes"]["handle"]
    except KeyError:
        job.pyprint("AIP has no DSpace handle stored")
        return NO_ACTION
    job.pyprint("DSpace handle:", handle)
    logger.info("DSpace handle: %s", handle)

    # POST Dspace handle to ArchivesSpace
    # Get ArchivesSpace config
    config = models.DashboardSetting.objects.get_dict(
        "upload-archivesspace_v0.0")
    archivesspace_url = config["base_url"]

    # Log in
    url = archivesspace_url + "/users/" + config["user"] + "/login"
    params = {"password": config["passwd"]}
    logger.debug("Log in to ArchivesSpace URL: %s", url)
    response = requests.post(
        url,
        params=params,
        timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT)
    logger.debug("Response: %s %s", response, response.content)
    session_id = response.json()["session"]
    headers = {"X-ArchivesSpace-Session": session_id}

    # Get Digital Object from ArchivesSpace
    url = archivesspace_url + digital_object.remoteid
    logger.debug("Get Digital Object info URL: %s", url)
    response = requests.get(
        url,
        headers=headers,
        timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT)
    logger.debug("Response: %s %s", response, response.content)
    body = response.json()

    # Update
    url = archivesspace_url + digital_object.remoteid
    file_version = {
        "file_uri": handle,
        "use_statement": config["use_statement"],
        "xlink_show_attribute": config["xlink_show"],
        "xlink_actuate_attribute": config["xlink_actuate"],
    }
    body["file_versions"].append(file_version)
    logger.debug("Modified Digital Object: %s", body)
    response = requests.post(
        url,
        headers=headers,
        json=body,
        timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT,
    )
    job.pyprint("Update response:", response, response.content)
    logger.debug("Response: %s %s", response, response.content)
    if response.status_code != 200:
        job.pyprint("Error updating", digital_object.remoteid)
        return ERROR
    return COMPLETED

Ejemplo n.º 16

0

Mostrar archivo

Archivo: rebuild_elasticsearch_aip_index_from_files.py Proyecto: helrond/archivematica

def processAIPThenDeleteMETSFile(path, temp_dir, es_client,
                                 delete_existing_data=False):
    archive_file = os.path.basename(path)

    # Regex match the UUID - AIP might end with .7z, .tar.bz2, or
    # something else.
    match = re.search(
        r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
        archive_file)
    if match is not None:
        aip_uuid = match.group()
    else:
        return -1

    print('Processing AIP', aip_uuid)

    if delete_existing_data is True:
        print('Deleting AIP', aip_uuid, 'from aips/aip and aips/aipfile.')
        elasticSearchFunctions.delete_aip(es_client, aip_uuid)
        elasticSearchFunctions.delete_aip_files(es_client, aip_uuid)

    # AIP filenames are <name>-<uuid><extension>
    # Index of match end is right before the extension
    subdir = archive_file[:match.end()]
    aip_name = subdir[:-37]
    mets_file = "METS." + aip_uuid + ".xml"
    mets_file_relative_path = os.path.join("data", mets_file)
    if os.path.isfile(path):
        mets_file_relative_path = os.path.join(subdir, mets_file_relative_path)
    path_to_mets = extract_file(
        archive_path=path,
        destination_dir=temp_dir,
        relative_path=mets_file_relative_path)

    # If AIC, need to extract number of AIPs in AIC to index as well
    aips_in_aic = None
    root = etree.parse(path_to_mets)
    try:
        aip_type = root.find(
            "m:dmdSec/m:mdWrap/m:xmlData/dc:dublincore/dc:type",
            namespaces=NSMAP).text
    except AttributeError:
        pass
    else:
        if aip_type == "Archival Information Collection":
            aips_in_aic = get_aips_in_aic(root, path, temp_dir)

    aip_info = storage_service.get_file_info(uuid=aip_uuid)

    if aip_info:
        elasticSearchFunctions.index_aip(
            client=es_client,
            uuid=aip_uuid,
            name=aip_name,
            filePath=path,
            pathToMETS=path_to_mets,
            aips_in_aic=aips_in_aic,
            identifiers=[],  # TODO get these
            size=aip_info[0]['size'],
        )
        elasticSearchFunctions.index_mets_file_metadata(
            client=es_client,
            uuid=aip_uuid,
            metsFilePath=path_to_mets,
            index='aips',
            type_='aipfile',
            sipName=aip_name,
            identifiers=[],  # TODO get these
        )

Ejemplo n.º 17

0

Mostrar archivo

Archivo: views.py Proyecto: verifyaccess/archivematica

def list_display(request):

    if "aips" not in settings.SEARCH_ENABLED:
        return render(request, "archival_storage/list.html")
    current_page_number = int(request.GET.get("page", 1))
    logger.debug("Current page: %s", current_page_number)

    # get count of AIP files
    es_client = elasticSearchFunctions.get_client()
    aip_indexed_file_count = aip_file_count(es_client)

    # get AIPs
    order_by = request.GET.get("order_by", "name")
    sort_by = request.GET.get("sort_by", "up")

    sort_params = "order_by=" + order_by + "&sort_by=" + sort_by

    # use raw subfield to sort by name
    if order_by == "name":
        order_by = order_by + ".raw"

    # change sort_by param to ES sort directions
    if sort_by == "down":
        sort_by = "desc"
    else:
        sort_by = "asc"

    sort_specification = order_by + ":" + sort_by

    # get list of UUIDs of AIPs that are deleted or pending deletion
    aips_deleted_or_pending_deletion = []
    should_haves = [{
        "match": {
            "status": "DEL_REQ"
        }
    }, {
        "match": {
            "status": "DELETED"
        }
    }]
    query = {"query": {"bool": {"should": should_haves}}}
    deleted_aip_results = es_client.search(body=query,
                                           index="aips",
                                           _source="uuid,status")
    for deleted_aip in deleted_aip_results["hits"]["hits"]:
        aips_deleted_or_pending_deletion.append(deleted_aip["_source"]["uuid"])

    # Fetch results and paginate
    def es_pager(page, page_size):
        """
        Fetch one page of normalized entries from Elasticsearch.

        :param page: 1-indexed page to fetch
        :param page_size: Number of entries on a page
        :return: List of dicts for each entry, where keys and values have been cleaned up
        """
        start = (page - 1) * page_size
        results = es_client.search(
            index="aips",
            body={"query": {
                "match_all": {}
            }},
            _source="origin,uuid,filePath,created,name,size,encrypted",
            sort=sort_specification,
            size=page_size,
            from_=start,
        )
        return [d["_source"] for d in results["hits"]["hits"]]

    items_per_page = 10
    count = es_client.count(index="aips", body={"query": {
        "match_all": {}
    }})["count"]
    results = LazyPagedSequence(es_pager,
                                page_size=items_per_page,
                                length=count)

    # Paginate
    page = helpers.pager(results, items_per_page, current_page_number)

    # process deletion, etc., and format results
    aips = []
    for aip in page.object_list:
        # If an AIP was deleted or is pending deletion, react if status changed
        if aip["uuid"] in aips_deleted_or_pending_deletion:
            # check with storage server to see current status
            api_results = storage_service.get_file_info(uuid=aip["uuid"])
            try:
                aip_status = api_results[0]["status"]
            except IndexError:
                # Storage service does not know about this AIP
                # TODO what should happen here?
                logger.info("AIP not found in storage service: {}".format(aip))
                continue

            # delete AIP metadata in ElasticSearch if AIP has been deleted from the
            # storage server
            # TODO: handle this asynchronously
            if aip_status == "DELETED":
                elasticSearchFunctions.delete_aip(es_client, aip["uuid"])
                elasticSearchFunctions.delete_aip_files(es_client, aip["uuid"])
            elif aip_status != "DEL_REQ":
                # update the status in ElasticSearch for this AIP
                elasticSearchFunctions.mark_aip_stored(es_client, aip["uuid"])
        else:
            aip_status = "UPLOADED"

        # Tweak AIP presentation and add to display array
        if aip_status != "DELETED":
            aip["status"] = AIP_STATUS_DESCRIPTIONS[aip_status]

            try:
                size = "{0:.2f} MB".format(float(aip["size"]))
            except (TypeError, ValueError):
                size = "Removed"

            aip["size"] = size

            aip["href"] = aip["filePath"].replace(AIPSTOREPATH + "/",
                                                  "AIPsStore/")
            aip["date"] = aip["created"]

            aips.append(aip)

    total_size = total_size_of_aips(es_client)
    # Find out which AIPs are encrypted

    return render(
        request,
        "archival_storage/list.html",
        {
            "total_size": total_size,
            "aip_indexed_file_count": aip_indexed_file_count,
            "aips": aips,
            "page": page,
            "search_params": sort_params,
        },
    )

Ejemplo n.º 18

0

Mostrar archivo

Archivo: store_aip.py Proyecto: uofmsean/archivematica

def store_aip(job, aip_destination_uri, aip_path, sip_uuid, sip_name,
              sip_type):
    """ Stores an AIP with the storage service.

    aip_destination_uri = storage service destination URI, should be of purpose
        AIP Store (AS)
    aip_path = Full absolute path to the AIP's current location on the local
        filesystem
    sip_uuid = UUID of the SIP, which will become the UUID of the AIP
    sip_name = SIP name.  Not used directly, but part of the AIP name

    Example inputs:
    storeAIP.py
        "/api/v1/location/9c2b5bb7-abd6-477b-88e0-57107219dace/"
        "/var/archivematica/sharedDirectory/currentlyProcessing/ep6-0737708e-9b99-471a-b331-283e2244164f/ep6-0737708e-9b99-471a-b331-283e2244164f.7z"
        "0737708e-9b99-471a-b331-283e2244164f"
        "ep6"
    """

    # FIXME Assume current Location is the one set up by default until location
    # is passed in properly, or use Agent to make sure is correct CP
    current_location = storage_service.get_location(purpose="CP")[0]

    # If ``aip_path`` does not exist, this may be a DIP that was not uploaded.
    # In that case, it will be in the uploadDIP/ directory instead of the
    # uploadedDIPs/ directory.
    if not os.path.exists(aip_path):
        aip_path = get_upload_dip_path(aip_path)

    # Make aip_path relative to the Location
    shared_path = os.path.join(current_location["path"],
                               "")  # Ensure ends with /
    relative_aip_path = aip_path.replace(shared_path, "")

    # Get the package type: AIC or AIP
    if "SIP" in sip_type or "AIP" in sip_type:  # Also matches AIP-REIN
        package_type = "AIP"
    elif "AIC" in sip_type:  # Also matches AIC-REIN
        package_type = "AIC"
    elif "DIP" in sip_type:
        package_type = "DIP"

    # Uncompressed directory AIPs must be terminated in a /,
    # otherwise the storage service will place the directory
    # inside another directory of the same name.
    current_path = os.path.basename(aip_path)
    if os.path.isdir(aip_path) and not aip_path.endswith("/"):
        relative_aip_path = relative_aip_path + "/"

    # DIPs cannot share the AIP UUID, as the storage service depends on
    # having a unique UUID; assign a new one before uploading.
    # TODO allow mapping the AIP UUID to the DIP UUID for retrieval.
    related_package_uuid = None
    if sip_type == "DIP":
        uuid = str(uuid4())
        job.pyprint(
            "Checking if DIP {} parent AIP has been created...".format(uuid))

        # Set related package UUID, so a relationship to the parent AIP can be
        # created if if AIP has been stored. If the AIP hasn't yet been stored
        # take note of the DIP's UUID so it the relationship can later be
        # created when the AIP is stored.
        try:
            storage_service.get_file_info(uuid=sip_uuid)[0]  # Check existence
            related_package_uuid = sip_uuid
            job.pyprint("Parent AIP exists so relationship can be created.")
        except IndexError:
            UnitVariable.objects.create(
                unittype="SIP",
                unituuid=sip_uuid,
                variable="relatedPackage",
                variablevalue=uuid,
            )
            job.pyprint(
                "Noting DIP UUID {} related to AIP so relationship can be created when AIP is stored."
                .format(uuid))
    else:
        uuid = sip_uuid
        try:
            related_package = UnitVariable.objects.get(
                unituuid=sip_uuid, variable="relatedPackage")
        except UnitVariable.DoesNotExist:
            pass
        else:
            related_package_uuid = related_package.variablevalue

    # If AIP is a directory, calculate size recursively
    if os.path.isdir(aip_path):
        size = 0
        for dirpath, _, filenames in os.walk(aip_path):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)
                size += os.path.getsize(file_path)
    else:
        size = os.path.getsize(aip_path)

    # Get the AIP subtype from any DC type attribute supplied by the user for
    # the AIP. If found, this will replace 'Archival Information Package' in
    # ``<mets:div TYPE='Archival Information Package'>`` in the pointer file.
    sip_metadata_uuid = "3e48343d-e2d2-4956-aaa3-b54d26eb9761"
    try:
        dc = DublinCore.objects.get(metadataappliestotype_id=sip_metadata_uuid,
                                    metadataappliestoidentifier=uuid)
    except DublinCore.DoesNotExist:
        aip_subtype = "Archival Information Package"
    else:
        aip_subtype = dc.type

    # Store the AIP
    try:
        new_file = _create_file(
            uuid,
            current_location,
            relative_aip_path,
            aip_destination_uri,
            current_path,
            package_type,
            aip_subtype,
            size,
            sip_type,
            related_package_uuid,
        )
    except StorageServiceCreateFileError as err:
        errmsg = "{} creation failed: {}.".format(sip_type, err)
        logger.warning(errmsg)
        raise Exception(errmsg + " See logs for more details.")

    message = "Storage Service created {}:\n{}".format(sip_type,
                                                       pformat(new_file))
    logger.info(message)
    job.pyprint(message)

    # Once the DIP is stored, remove it from the uploadDIP watched directory as
    # it will no longer need to be referenced from there by the user or the
    # system.
    rmtree_upload_dip_transitory_loc(package_type, aip_path)
    return 0