Exemple #1
0
def sync_es_aip_status_with_storage_service(uuid, es_status):
    """Update AIP's status in ES indices to match Storage Service.

    This is a bit of a kludge that is made necessary by the fact that
    the Storage Service does not update ElasticSearch directly when
    a package's status has changed.

    Updates to ES are visible in Archival Storage after running a new
    search or refreshing the page.

    :param uuid: AIP UUID.
    :param es_status: Current package status in ES.

    :returns: Boolean indicating whether AIP should be kept in search
    results (i.e. has not been deleted from Storage Service).
    """
    keep_in_results = True

    amclient = setup_amclient()
    amclient.package_uuid = uuid
    api_results = amclient.get_package_details()

    if api_results in AMCLIENT_ERROR_CODES:
        logger.warning(
            "Package {} not found in Storage Service. AMClient error code: {}".format(
                uuid, api_results
            )
        )
        return keep_in_results

    aip_status = api_results.get("status")

    if not aip_status:
        logger.warning(
            "Status for package {} could not be retrived from Storage Service."
        )
        return keep_in_results

    if (
        aip_status == es.STATUS_DELETE_REQUESTED
        and es_status != es.STATUS_DELETE_REQUESTED
    ):
        es_client = es.get_client()
        es.mark_aip_deletion_requested(es_client, uuid)
    elif aip_status == es.STATUS_UPLOADED and es_status != es.STATUS_UPLOADED:
        es_client = es.get_client()
        es.revert_aip_deletion_request(es_client, uuid)
    elif aip_status == es.STATUS_DELETED:
        keep_in_results = False
        es_client = es.get_client()
        es.delete_aip(es_client, uuid)
        es.delete_aip_files(es_client, uuid)

    return keep_in_results
Exemple #2
0
def aip_file_download(request, uuid):
    es_client = es.get_client()

    # get AIP file properties
    aipfile = es.get_aipfile_data(es_client, uuid, fields="filePath,FILEUUID,AIPUUID")

    # get file's AIP's properties
    sipuuid = aipfile["_source"]["AIPUUID"]
    aip = es.get_aip_data(
        es_client, sipuuid, fields="uuid,name,filePath,size,origin,created"
    )
    aip_filepath = aip["_source"]["filePath"]

    # work out path components
    aip_archive_filename = os.path.basename(aip_filepath)

    # splittext doesn't deal with double extensions, so special-case .tar.bz2
    if aip_archive_filename.endswith(".tar.bz2"):
        subdir = aip_archive_filename[:-8]
    else:
        subdir = os.path.splitext(aip_archive_filename)[0]

    file_relative_path = os.path.join(subdir, "data", aipfile["_source"]["filePath"])

    redirect_url = storage_service.extract_file_url(
        aip["_source"]["uuid"], file_relative_path
    )
    return helpers.stream_file_from_storage_service(
        redirect_url, "Storage service returned {}; check logs?"
    )
    def handle(self, *args, **options):
        """Entry point of the rebuild_transfer_backlog command."""
        if not self.confirm(options['no_prompt']):
            sys.exit(0)

        transfer_backlog_dir = self.prepdir(options['transfer_backlog_dir'])
        if not os.path.exists(transfer_backlog_dir):
            raise CommandError('Directory does not exist: %s',
                               transfer_backlog_dir)
        self.success('Rebuilding "transfers" index from {}.'.format(
            transfer_backlog_dir))

        # Connect to Elasticsearch.
        elasticSearchFunctions.setup_reading_from_conf(django_settings)
        es_client = elasticSearchFunctions.get_client()
        try:
            es_info = es_client.info()
        except Exception as err:
            raise CommandError("Unable to connect to Elasticsearch: %s" % err)
        else:
            self.success('Connected to Elasticsearch node {} (v{}).'.format(
                es_info['name'], es_info['version']['number']))

        self.delete_index(es_client)
        self.create_index(es_client)
        self.populate_index(es_client, transfer_backlog_dir)
        self.success('Indexing complete!')
def call(jobs):
    with transaction.atomic():
        for job in jobs:
            with job.JobContext(logger=logger):
                if 'transfers' not in mcpclient_settings.SEARCH_ENABLED:
                    logger.info(
                        'Skipping indexing: Transfers indexing is currently disabled.'
                    )
                    job.set_status(0)
                    continue

                transfer_path = job.args[1]
                transfer_uuid = job.args[2]
                try:
                    status = job.args[3]
                except IndexError:
                    status = ''

                elasticSearchFunctions.setup_reading_from_conf(
                    mcpclient_settings)
                client = elasticSearchFunctions.get_client()
                job.set_status(
                    elasticSearchFunctions.index_transfer_and_files(
                        client,
                        transfer_uuid,
                        transfer_path,
                        status=status,
                        printfn=job.pyprint,
                    ))
Exemple #5
0
    def put(self, request, fileuuid):
        """
        Requires a file UUID, and document body must be a JSON-encoded list.
        Replaces the list of tags in the record with the provided list.
        Returns {"success": true} on success.
        Returns 400 if no JSON document is provided in the request body, if the body can't be decoded, or if the body is any JSON object other than a list.
        """
        try:
            tags = json.load(request)
        except ValueError:
            response = {
                "success": False,
                "message": "No JSON document could be decoded from the request.",
            }
            return helpers.json_response(response, status_code=400)
        if not isinstance(tags, list):
            response = {
                "success": False,
                "message": "The request body must be an array.",
            }
            return helpers.json_response(response, status_code=400)

        try:
            es_client = elasticSearchFunctions.get_client()
            elasticSearchFunctions.set_file_tags(es_client, fileuuid, tags)
        except elasticSearchFunctions.ElasticsearchError as e:
            response = {"success": False, "message": str(e)}
            if isinstance(e, elasticSearchFunctions.EmptySearchResultError):
                status_code = 404
            else:
                status_code = 400
            return helpers.json_response(response, status_code=status_code)
        return helpers.json_response({"success": True})
Exemple #6
0
def file_details(request, fileuuid):
    try:
        es_client = elasticSearchFunctions.get_client()
        source = elasticSearchFunctions.get_transfer_file_info(
            es_client, 'fileuuid', fileuuid)
    except elasticSearchFunctions.ElasticsearchError as e:
        message = str(e)
        response = {
            'success': False,
            'message': message,
        }
        if 'no exact results' in message:
            status_code = 404
        else:
            status_code = 500
        return helpers.json_response(response, status_code=status_code)

    format_info = source.get('format', [{}])[0]
    record = {
        'id': source['fileuuid'],
        'type': 'file',
        'title': source['filename'],
        'size': source['size'],
        'bulk_extractor_reports': source.get('bulk_extractor_reports', []),
        'tags': source.get('tags', []),
        'format': format_info.get('format'),
        'group': format_info.get('group'),
        'puid': format_info.get('puid'),
    }
    return helpers.json_response(record)
Exemple #7
0
def file_details(request, fileuuid):
    try:
        es_client = elasticSearchFunctions.get_client()
        source = elasticSearchFunctions.get_transfer_file_info(
            es_client, "fileuuid", fileuuid
        )
    except elasticSearchFunctions.ElasticsearchError as e:
        message = str(e)
        response = {"success": False, "message": message}
        if "no exact results" in message:
            status_code = 404
        else:
            status_code = 500
        return helpers.json_response(response, status_code=status_code)

    format_info = source.get("format", [{}])[0]
    record = {
        "id": source["fileuuid"],
        "type": "file",
        "title": source["filename"],
        "size": source["size"],
        "bulk_extractor_reports": source.get("bulk_extractor_reports", []),
        "tags": source.get("tags", []),
        "format": format_info.get("format"),
        "group": format_info.get("group"),
        "puid": format_info.get("puid"),
    }
    return helpers.json_response(record)
Exemple #8
0
def aip_file_download(request, uuid):
    # get file basename
    file = models.File.objects.get(uuid=uuid)
    file_basename = os.path.basename(file.currentlocation)

    # get file's AIP's properties
    sipuuid = helpers.get_file_sip_uuid(uuid)
    es_client = elasticSearchFunctions.get_client()
    aip = elasticSearchFunctions.get_aip_data(es_client, sipuuid, fields='uuid,name,filePath,size,origin,created')
    aip_filepath = aip['fields']['filePath'][0]

    # work out path components
    aip_archive_filename = os.path.basename(aip_filepath)

    # splittext doesn't deal with double extensions, so special-case .tar.bz2
    if aip_archive_filename.endswith('.tar.bz2'):
        subdir = aip_archive_filename[:-8]
    else:
        subdir = os.path.splitext(aip_archive_filename)[0]

    # Strip %Directory% from the path
    path_to_file_within_aip_data_dir = os.path.dirname(file.currentlocation.replace('%transferDirectory%', '').replace('%SIPDirectory%', ''))

    file_relative_path = os.path.join(
        subdir,
        'data',
        path_to_file_within_aip_data_dir,
        file_basename
    )

    redirect_url = storage_service.extract_file_url(aip['fields']['uuid'][0], file_relative_path)
    return helpers.stream_file_from_storage_service(redirect_url, 'Storage service returned {}; check logs?')
Exemple #9
0
def setup_es_for_aip_reindexing(cmd, delete_all=False):
    """Setup for reindexing AIPs.

    :param cmd: Command object.
    :param delete_all: Optional arg to delete AIP indices.

    :returns: ES client.
    """
    if es.AIPS_INDEX not in django_settings.SEARCH_ENABLED:
        raise CommandError(
            "The AIPs indexes are not enabled. Please, make sure to "
            "set the *_SEARCH_ENABLED environment variables to `true` "
            "to enable the AIPs and Transfers indexes, or to `aips` "
            "to only enable the AIPs indexes.")

    try:
        es.setup_reading_from_conf(django_settings)
        es_client = es.get_client()
    except ElasticsearchException as err:
        raise CommandError(
            "Unable to connect to Elasticsearch: %s".format(err))

    if delete_all:
        cmd.info("Deleting all AIPs in the 'aips' and 'aipfiles' indices")
        time.sleep(3)  # Time for the user to panic and kill the process.
        indices = [es.AIPS_INDEX, es.AIP_FILES_INDEX]
        es_client.indices.delete(",".join(indices), ignore=404)
        es.create_indexes_if_needed(es_client, indices)

    return es_client
Exemple #10
0
def delete(request, uuid):
    """
    Request deletion of a package from a backlog transfer

    :param request: The Django request object
    :param uuid: The UUID of the package requested for deletion.
    :return: Redirects the user back to the backlog page
    """
    try:
        reason_for_deletion = request.POST.get("reason_for_deletion", "")
        response = storage_service.request_file_deletion(
            uuid, request.user.id, request.user.email, reason_for_deletion
        )

        messages.info(request, response["message"])
        es_client = es.get_client()
        es.mark_backlog_deletion_requested(es_client, uuid)

    except requests.exceptions.ConnectionError:
        messages.warning(
            request,
            _(
                "Unable to connect to storage server. Please contact your administrator."
            ),
        )
    except requests.exceptions.RequestException:
        raise Http404

    return redirect("backlog:backlog_index")
Exemple #11
0
def create_aic(request, *args, **kwargs):
    aic_form = forms.CreateAICForm(request.POST or None)
    if aic_form.is_valid():
        aip_uuids = ast.literal_eval(aic_form.cleaned_data['results'])
        logger.info("AIC AIP UUIDs: {}".format(aip_uuids))

        # The form was passed a raw list of all AIP UUIDs mapping the user's query;
        # use those to fetch their names, which is used to produce files below.
        query = {
            "query": {
                "terms": {
                    "uuid": aip_uuids,
                }
            }
        }
        es_client = elasticSearchFunctions.get_client()
        results = es_client.search(
            body=query,
            index='aips',
            doc_type='aip',
            fields='uuid,name',
            size=elasticSearchFunctions.MAX_QUERY_SIZE,  # return all records
        )

        # Create files in staging directory with AIP information
        shared_dir = helpers.get_server_config_value('sharedDirectory')
        staging_dir = os.path.join(shared_dir, 'tmp')

        # Create SIP (AIC) directory in staging directory
        temp_uuid = str(uuid.uuid4())
        destination = os.path.join(staging_dir, temp_uuid)
        try:
            os.mkdir(destination)
            os.chmod(destination, 0o770)
        except os.error:
            messages.error(request, "Error creating AIC")
            logger.exception(
                "Error creating AIC: Error creating directory {}".format(
                    destination))
            return redirect('archival_storage_index')

        # Create SIP in DB
        mcp_destination = destination.replace(shared_dir, '%sharedPath%') + '/'
        databaseFunctions.createSIP(mcp_destination,
                                    UUID=temp_uuid,
                                    sip_type='AIC')

        # Create files with filename = AIP UUID, and contents = AIP name
        for aip in results['hits']['hits']:
            filepath = os.path.join(destination, aip['fields']['uuid'][0])
            with open(filepath, 'w') as f:
                os.chmod(filepath, 0o660)
                f.write(str(aip['fields']['name'][0]))

        return redirect('components.ingest.views.aic_metadata_add', temp_uuid)
    else:
        messages.error(request, "Error creating AIC")
        logger.error("Error creating AIC: Form not valid: {}".format(aic_form))
        return redirect('archival_storage_index')
Exemple #12
0
def _document_json_response(document_id_modified, index):
    document_id = document_id_modified.replace("____", "-")
    es_client = elasticSearchFunctions.get_client()
    data = es_client.get(index=index,
                         doc_type=elasticSearchFunctions.DOC_TYPE,
                         id=document_id)
    pretty_json = json.dumps(data, sort_keys=True, indent=2)
    return HttpResponse(pretty_json, content_type="application/json")
Exemple #13
0
def post_store_hook(job, sip_uuid):
    """
    Hook for doing any work after an AIP is stored successfully.
    """
    update_es = "transfers" in mcpclient_settings.SEARCH_ENABLED
    if update_es:
        elasticSearchFunctions.setup_reading_from_conf(mcpclient_settings)
        client = elasticSearchFunctions.get_client()
    else:
        logger.info(
            "Skipping indexing: Transfers indexing is currently disabled.")

    # SIP ARRANGEMENT

    # Mark files in this SIP as in an AIP (aip_created)
    file_uuids = models.File.objects.filter(sip=sip_uuid).values_list(
        "uuid", flat=True)
    models.SIPArrange.objects.filter(file_uuid__in=file_uuids).update(
        aip_created=True)

    # Check if any of component transfers are completely stored
    # TODO Storage service should index AIPs, knows when to update ES
    transfer_uuids = set(
        models.SIPArrange.objects.filter(file_uuid__in=file_uuids).values_list(
            "transfer_uuid", flat=True))
    for transfer_uuid in transfer_uuids:
        job.pyprint("Checking if transfer", transfer_uuid,
                    "is fully stored...")
        arranged_uuids = set(
            models.SIPArrange.objects.filter(
                transfer_uuid=transfer_uuid).filter(
                    aip_created=True).values_list("file_uuid", flat=True))
        backlog_uuids = set(
            models.File.objects.filter(transfer=transfer_uuid).values_list(
                "uuid", flat=True))
        # If all backlog UUIDs have been arranged
        if arranged_uuids == backlog_uuids:
            job.pyprint(
                "Transfer",
                transfer_uuid,
                "fully stored, sending delete request to storage service, deleting from transfer backlog",
            )
            # Submit delete req to SS (not actually delete), remove from ES
            storage_service.request_file_deletion(
                uuid=transfer_uuid,
                user_id=0,
                user_email="archivematica system",
                reason_for_deletion="All files in Transfer are now in AIPs.",
            )
            if update_es:
                elasticSearchFunctions.remove_sip_transfer_files(
                    client, transfer_uuid)

    # DSPACE HANDLE TO ARCHIVESSPACE
    dspace_handle_to_archivesspace(job, sip_uuid)

    # POST-STORE CALLBACK
    storage_service.post_store_aip_callback(sip_uuid)
Exemple #14
0
def download_by_uuid(request, uuid, preview_file=False):
    """Download a file from the Storage Service, given its UUID.

    This view will stream the response directly from the storage service,
    so, unlike download_ss, this will work even if the Storage Service is
    not accessible to the requestor.

    It looks up the full relative path in the ``transferfiles`` search index.
    ``relative_path`` includes the ``data`` directory when the transfer package
    uses the BagIt format.

    Returns 404 if a file with the requested UUID cannot be found. Otherwise
    the status code is returned via the call to
    ``stream_file_from_storage_service``

    ``preview_file`` is an instruction to be applied to the response headers
    to enable the file to be seen inside the browser if it is capable of being
    rendered. On receiving this instruction, the content-disposition header
    will be set in the stream_file_from_storage_service to 'inline'.
    """
    not_found_err = helpers.json_response(
        {
            "success": False,
            "message": _("File with UUID %(uuid)s " "could not be found")
            % {"uuid": uuid},
        },
        status_code=404,
    )

    try:
        record = elasticSearchFunctions.get_transfer_file_info(
            elasticSearchFunctions.get_client(), "fileuuid", uuid
        )
    except elasticSearchFunctions.ElasticsearchError:
        return not_found_err

    try:
        transfer_id, relpath = record["sipuuid"], record["relative_path"]
    except KeyError:
        logger.debug("Search document is missing required parameters")
        return not_found_err

    # E.g. from "<name>-<uuid>/data/objects/bird.mp3" we only need the path
    # component not including transfer name or UUID.
    try:
        relpath = relpath.split("/", 1)[1]
    except IndexError:
        logger.debug(
            "Relative path in search document has an unexpected form: %s", relpath
        )
        return not_found_err

    redirect_url = storage_service.extract_file_url(transfer_id, relpath)
    return helpers.stream_file_from_storage_service(
        redirect_url, "Storage service returned {}; check logs?", preview_file
    )
Exemple #15
0
def execute(request):
    """
    Remove any deleted transfers from ES and render main backlog page.

    :param request: The Django request object
    :return: The main backlog page rendered
    """
    es_client = elasticSearchFunctions.get_client()
    check_and_remove_deleted_transfers(es_client)
    return render(request, 'backlog/backlog.html', locals())
Exemple #16
0
def _index_transfer(job, transfer_id, transfer_path, size):
    """Index the transfer and its files in Elasticsearch."""
    if "transfers" not in mcpclient_settings.SEARCH_ENABLED:
        logger.info("Skipping indexing:" " Transfers indexing is currently disabled.")
        return
    elasticSearchFunctions.setup_reading_from_conf(mcpclient_settings)
    client = elasticSearchFunctions.get_client()
    elasticSearchFunctions.index_transfer_and_files(
        client, transfer_id, transfer_path, size, printfn=job.pyprint
    )
Exemple #17
0
def index_aip(job):
    """Write AIP information to ElasticSearch. """
    sip_uuid = job.args[1]  # %SIPUUID%
    sip_name = job.args[2]  # %SIPName%
    sip_staging_path = job.args[3]  # %SIPDirectory%
    sip_type = job.args[4]  # %SIPType%
    if "aips" not in mcpclient_settings.SEARCH_ENABLED:
        logger.info("Skipping indexing: AIPs indexing is currently disabled.")
        return 0
    elasticSearchFunctions.setup_reading_from_conf(mcpclient_settings)
    client = elasticSearchFunctions.get_client()
    aip_info = storage_service.get_file_info(uuid=sip_uuid)
    job.pyprint("AIP info:", aip_info)
    aip_info = aip_info[0]
    mets_staging_path = os.path.join(sip_staging_path,
                                     "METS.{}.xml".format(sip_uuid))
    identifiers = get_identifiers(job, sip_staging_path)
    # If this is an AIC, find the number of AIP stored in it and index that
    aips_in_aic = None
    if sip_type == "AIC":
        try:
            uv = UnitVariable.objects.get(unittype="SIP",
                                          unituuid=sip_uuid,
                                          variable="AIPsinAIC")
            aips_in_aic = uv.variablevalue
        except UnitVariable.DoesNotExist:
            pass
    # Delete ES index before creating new one if reingesting
    if "REIN" in sip_type:
        job.pyprint(
            "Deleting outdated entry for AIP and AIP files with UUID",
            sip_uuid,
            "from archival storage",
        )
        elasticSearchFunctions.delete_aip(client, sip_uuid)
        elasticSearchFunctions.delete_aip_files(client, sip_uuid)
    job.pyprint("Indexing AIP and AIP files")
    # Even though we treat MODS identifiers as SIP-level, we need to index them
    # here because the archival storage tab actually searches on the
    # aips/aipfile index.
    ret = elasticSearchFunctions.index_aip_and_files(
        client=client,
        uuid=sip_uuid,
        aip_stored_path=aip_info["current_full_path"],
        mets_staging_path=mets_staging_path,
        name=sip_name,
        aip_size=aip_info["size"],
        aips_in_aic=aips_in_aic,
        identifiers=identifiers,
        encrypted=aip_info["encrypted"],
        printfn=job.pyprint,
    )
    if ret == 1:
        job.pyprint("Error indexing AIP and AIP files", file=sys.stderr)
    return ret
Exemple #18
0
def execute(request):
    """
    Remove any deleted transfers from ES and render main backlog page.

    :param request: The Django request object
    :return: The main backlog page rendered
    """
    if "transfers" in django_settings.SEARCH_ENABLED:
        es_client = elasticSearchFunctions.get_client()
        check_and_remove_deleted_transfers(es_client)
    return render(request, "backlog/backlog.html", locals())
Exemple #19
0
def create_aic(request, *args, **kwargs):
    aic_form = forms.CreateAICForm(request.POST or None)
    if aic_form.is_valid():
        aip_uuids = ast.literal_eval(aic_form.cleaned_data["results"])
        logger.info("AIC AIP UUIDs: {}".format(aip_uuids))

        # The form was passed a raw list of all AIP UUIDs mapping the user's query;
        # use those to fetch their names, which is used to produce files below.
        query = {"query": {"terms": {"uuid": aip_uuids}}}
        es_client = elasticSearchFunctions.get_client()
        results = es_client.search(
            body=query,
            index="aips",
            _source="uuid,name",
            size=elasticSearchFunctions.MAX_QUERY_SIZE,  # return all records
        )

        # Create files in staging directory with AIP information
        shared_dir = settings.SHARED_DIRECTORY
        staging_dir = os.path.join(shared_dir, "tmp")

        # Create SIP (AIC) directory in staging directory
        temp_uuid = str(uuid.uuid4())
        destination = os.path.join(staging_dir, temp_uuid)
        try:
            os.mkdir(destination)
            os.chmod(destination, 0o770)
        except os.error:
            messages.error(request, "Error creating AIC")
            logger.exception(
                "Error creating AIC: Error creating directory {}".format(
                    destination))
            return redirect("archival_storage_index")

        # Create SIP in DB
        mcp_destination = destination.replace(shared_dir, "%sharedPath%") + "/"
        databaseFunctions.createSIP(mcp_destination,
                                    UUID=temp_uuid,
                                    sip_type="AIC")

        # Create files with filename = AIP UUID, and contents = AIP name
        for aip in results["hits"]["hits"]:
            filepath = os.path.join(destination, aip["_source"]["uuid"])
            with open(filepath, "w") as f:
                os.chmod(filepath, 0o660)
                f.write(str(aip["_source"]["name"]))

        return redirect("components.ingest.views.aic_metadata_add", temp_uuid)
    else:
        messages.error(request, "Error creating AIC")
        logger.error("Error creating AIC: Form not valid: {}".format(aic_form))
        return redirect("archival_storage_index")
Exemple #20
0
def call(jobs):
    for job in jobs:
        with job.JobContext(logger=logger):
            aip_uuid = job.args[1]

            if "aips" not in mcpclient_settings.SEARCH_ENABLED:
                logger.info("Skipping. AIPs indexing is currently disabled.")
                return

            elasticSearchFunctions.setup_reading_from_conf(mcpclient_settings)
            client = elasticSearchFunctions.get_client()

            logger.info("Removing indexed files for AIP %s...", aip_uuid)
            elasticSearchFunctions.delete_aip_files(client, aip_uuid)
    def handle(self, *args, **options):
        """Entry point of the rebuild_transfer_backlog command."""
        # Check that the `transfers` part of the search is enabled
        if es.TRANSFERS_INDEX not in django_settings.SEARCH_ENABLED:
            print(
                "The Transfers indexes are not enabled. Please, make sure to "
                "set the *_SEARCH_ENABLED environment variables to `true` "
                "to enable the Transfers and AIPs indexes, or to `transfers` "
                "to only enable the Transfers indexes.")
            sys.exit(1)

        if not self.confirm(options["no_prompt"]):
            sys.exit(0)

        # Ignore elasticsearch-py logging events unless they're errors.
        logging.getLogger("elasticsearch").setLevel(logging.ERROR)
        logging.getLogger("archivematica.common").setLevel(logging.ERROR)

        transfer_backlog_dir = self.prepdir(options["transfer_backlog_dir"])
        if options["from_storage_service"]:
            self.info(
                'Rebuilding "transfers" index from packages in Storage Service.'
            )
        else:
            if not os.path.exists(transfer_backlog_dir):
                raise CommandError("Directory does not exist: %s",
                                   transfer_backlog_dir)
            self.info('Rebuilding "transfers" index from {}.'.format(
                transfer_backlog_dir))

        # Connect to Elasticsearch.
        es.setup_reading_from_conf(django_settings)
        es_client = es.get_client()
        try:
            es_info = es_client.info()
        except Exception as err:
            raise CommandError("Unable to connect to Elasticsearch: %s" % err)
        else:
            self.info("Connected to Elasticsearch node {} (v{}).".format(
                es_info["name"], es_info["version"]["number"]))

        indexes = [es.TRANSFERS_INDEX, es.TRANSFER_FILES_INDEX]
        self.delete_indexes(es_client, indexes)
        self.create_indexes(es_client, indexes)

        if options["from_storage_service"]:
            pipeline_uuid = options["pipeline"]
            self.populate_data_from_storage_service(es_client, pipeline_uuid)
        else:
            self.populate_data_from_files(es_client, transfer_backlog_dir)
Exemple #22
0
def aip_mets_file_download(request, uuid):
    """Download an individual AIP METS file."""
    es_client = es.get_client()
    try:
        aip = es.get_aip_data(es_client, uuid, fields="name")
    except IndexError:
        # TODO: 404 settings for the project do not display this to the user (only DEBUG).
        raise Http404(
            _("The AIP package containing the requested METS cannot be found")
        )
    transfer_name = aip["_source"]["name"]
    return helpers.stream_mets_from_storage_service(
        transfer_name=transfer_name, sip_uuid=uuid
    )
Exemple #23
0
 def delete(self, request, fileuuid):
     """
     Request a file UUID.
     Deletes all tags for the given file.
     """
     try:
         es_client = elasticSearchFunctions.get_client()
         elasticSearchFunctions.set_file_tags(es_client, fileuuid, [])
     except elasticSearchFunctions.ElasticsearchError as e:
         response = {"success": False, "message": str(e)}
         if isinstance(e, elasticSearchFunctions.EmptySearchResultError):
             status_code = 404
         else:
             status_code = 400
         return helpers.json_response(response, status_code=status_code)
     return helpers.json_response({"success": True})
Exemple #24
0
def create_aic(request):
    """Create an AIC from POSTed list of AIP UUIDs.

    :param request: Django request object.
    :return: Redirect to appropriate view.
    """
    uuids = request.GET.get("uuids")
    if not uuids:
        messages.error(request, "Unable to create AIC: No AIPs selected")
        return redirect("archival_storage:archival_storage_index")

    # Make a list of UUIDs from from comma-separated string in request.
    aip_uuids = uuids.split(",")
    logger.info("AIC AIP UUIDs: {}".format(aip_uuids))

    # Use the AIP UUIDs to fetch names, which are used to produce files below.
    query = {"query": {"terms": {"uuid": aip_uuids}}}
    es_client = es.get_client()
    results = es_client.search(
        body=query, index=es.AIPS_INDEX, _source="uuid,name", size=es.MAX_QUERY_SIZE
    )

    # Create SIP (AIC) directory in a staging directory.
    shared_dir = settings.SHARED_DIRECTORY
    staging_dir = os.path.join(shared_dir, "tmp")
    temp_uuid = str(uuid.uuid4())
    destination = os.path.join(staging_dir, temp_uuid)
    try:
        os.mkdir(destination)
        os.chmod(destination, DIRECTORY_PERMISSIONS)
    except OSError as e:
        messages.error(request, "Error creating AIC")
        logger.exception("Error creating AIC: {}".format(e))
        return redirect("archival_storage:archival_storage_index")

    # Create an entry for the SIP (AIC) in the database.
    mcp_destination = os.path.join(destination.replace(shared_dir, "%sharedPath%"), "")
    databaseFunctions.createSIP(mcp_destination, UUID=temp_uuid, sip_type="AIC")

    # Create files with filename = AIP UUID, and contents = AIP name.
    for aip in results["hits"]["hits"]:
        filepath = os.path.join(destination, aip["_source"]["uuid"])
        with open(filepath, "w") as f:
            os.chmod(filepath, FILE_PERMISSIONS)
            f.write(str(aip["_source"]["name"]))

    return redirect("ingest:aic_metadata_add", temp_uuid)
Exemple #25
0
    def get(self, request, fileuuid):
        """
        Requires a file UUID.
        Returns a JSON-encoded list of the file's tags on success.
        """
        try:
            es_client = elasticSearchFunctions.get_client()
            tags = elasticSearchFunctions.get_file_tags(es_client, fileuuid)
        except elasticSearchFunctions.ElasticsearchError as e:
            response = {"success": False, "message": str(e)}
            if isinstance(e, elasticSearchFunctions.EmptySearchResultError):
                status_code = 404
            else:
                status_code = 400
            return helpers.json_response(response, status_code=status_code)

        return helpers.json_response(tags)
Exemple #26
0
def send_thumbnail(request, fileuuid):
    # get AIP location to use to find root of AIP storage
    es_client = es.get_client()
    aipfile = es.get_aipfile_data(es_client, fileuuid, fields="AIPUUID")
    sipuuid = aipfile["_source"]["AIPUUID"]

    thumbnail_path = os.path.join(
        settings.SHARED_DIRECTORY, "www", "thumbnails", sipuuid, fileuuid + ".jpg"
    )

    # send "blank" thumbnail if one exists:
    # Because thumbnails aren't kept in ElasticSearch they can be queried for,
    # during searches, from multiple dashboard servers.
    # Because ElasticSearch don't know if a thumbnail exists or not, this is
    # a way of not causing visual disruption if a thumbnail doesn't exist.
    if not os.path.exists(thumbnail_path):
        thumbnail_path = os.path.join(settings.BASE_PATH, "media/images/1x1-pixel.png")

    return helpers.send_file(request, thumbnail_path)
Exemple #27
0
def execute(request):
    """Remove any deleted AIPs from ES index and render main archival storage page.

    :param request: Django request object.
    :return: The main archival storage page rendered.
    """
    if es.AIPS_INDEX in settings.SEARCH_ENABLED:
        es_client = es.get_client()

        total_size = total_size_of_aips(es_client)
        aip_indexed_file_count = aip_file_count(es_client)

        return render(
            request,
            "archival_storage/archival_storage.html",
            {
                "total_size": total_size,
                "aip_indexed_file_count": aip_indexed_file_count,
            },
        )

    return render(request, "archival_storage/archival_storage.html")
    def handle(self, *args, **options):
        """Entry point of the rebuild_transfer_backlog command."""
        # Check that the `transfers` part of the search is enabled
        if 'transfers' not in django_settings.SEARCH_ENABLED:
            print(
                "The Transfers indexes are not enabled. Please, make sure to "
                "set the *_SEARCH_ENABLED environment variables to `true` "
                "to enable the Transfers and AIPs indexes, or to `transfers` "
                "to only enable the Transfers indexes.")
            sys.exit(1)

        if not self.confirm(options['no_prompt']):
            sys.exit(0)

        transfer_backlog_dir = self.prepdir(options['transfer_backlog_dir'])
        if not os.path.exists(transfer_backlog_dir):
            raise CommandError('Directory does not exist: %s',
                               transfer_backlog_dir)
        self.success('Rebuilding "transfers" index from {}.'.format(
            transfer_backlog_dir))

        # Connect to Elasticsearch.
        elasticSearchFunctions.setup_reading_from_conf(django_settings)
        es_client = elasticSearchFunctions.get_client()
        try:
            es_info = es_client.info()
        except Exception as err:
            raise CommandError("Unable to connect to Elasticsearch: %s" % err)
        else:
            self.success('Connected to Elasticsearch node {} (v{}).'.format(
                es_info['name'], es_info['version']['number']))

        indexes = ['transfers', 'transferfiles']
        self.delete_indexes(es_client, indexes)
        self.create_indexes(es_client, indexes)
        self.populate_indexes(es_client, transfer_backlog_dir)
        self.success('Indexing complete!')
Exemple #29
0
def post_store_hook(sip_uuid):
    """
    Hook for doing any work after an AIP is stored successfully.
    """
    elasticSearchFunctions.setup_reading_from_client_conf()
    client = elasticSearchFunctions.get_client()

    # SIP ARRANGEMENT

    # Mark files in this SIP as in an AIP (aip_created)
    file_uuids = models.File.objects.filter(sip=sip_uuid).values_list('uuid', flat=True)
    models.SIPArrange.objects.filter(file_uuid__in=file_uuids).update(aip_created=True)

    # Check if any of component transfers are completely stored
    # TODO Storage service should index AIPs, knows when to update ES
    transfer_uuids = set(models.SIPArrange.objects.filter(file_uuid__in=file_uuids).values_list('transfer_uuid', flat=True))
    for transfer_uuid in transfer_uuids:
        print('Checking if transfer', transfer_uuid, 'is fully stored...')
        arranged_uuids = set(models.SIPArrange.objects.filter(transfer_uuid=transfer_uuid).filter(aip_created=True).values_list('file_uuid', flat=True))
        backlog_uuids = set(models.File.objects.filter(transfer=transfer_uuid).values_list('uuid', flat=True))
        # If all backlog UUIDs have been arranged
        if arranged_uuids == backlog_uuids:
            print('Transfer', transfer_uuid, 'fully stored, sending delete request to storage service, deleting from transfer backlog')
            # Submit delete req to SS (not actually delete), remove from ES
            storage_service.request_file_deletion(
                uuid=transfer_uuid,
                user_id=0,
                user_email='archivematica system',
                reason_for_deletion='All files in Transfer are now in AIPs.'
            )
            elasticSearchFunctions.remove_transfer_files(client, transfer_uuid)

    # DSPACE HANDLE TO ARCHIVESSPACE
    dspace_handle_to_archivesspace(sip_uuid)

    # POST-STORE CALLBACK
    storage_service.post_store_aip_callback(sip_uuid)
Exemple #30
0
def delete(request, uuid):
    """
    Request deletion of a package from a backlog transfer

    :param request: The Django request object
    :param uuid: The UUID of the package requested for deletion.
    :return: Redirects the user back to the backlog page
    """
    try:
        reason_for_deletion = request.POST.get('reason_for_deletion', '')
        response = storage_service.request_file_deletion(
            uuid, request.user.id, request.user.email, reason_for_deletion)

        messages.info(request, response['message'])
        es_client = elasticSearchFunctions.get_client()
        elasticSearchFunctions.mark_backlog_deletion_requested(es_client, uuid)

    except requests.exceptions.ConnectionError:
        error_message = 'Unable to connect to storage server. Please contact your administrator.'
        messages.warning(request, error_message)
    except slumber.exceptions.HttpClientError:
        raise Http404

    return redirect('backlog_index')