Python ArchiveJob.load Exemples, website.archiver.model.ArchiveJob.load Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : tasks.py Projet : Alpani/osf.io

 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = dst.archive_job.target_info()
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo]
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)

Exemple #2

0

Afficher le fichier

Fichier : tasks.py Projet : DanielSBrown/osf.io

def stat_addon(addon_short_name, job_pk):
    """Collect metadata about the file tree of a given addon

    :param addon_short_name: AddonConfig.short_name of the addon to be examined
    :param job_pk: primary key of archive_job
    :return: AggregateStatResult containing file tree metadata
    """
    # Dataverse reqires special handling for draft and
    # published content
    addon_name = addon_short_name
    version = None
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        version = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    src_addon = src.get_addon(addon_name)
    try:
        file_tree = src_addon._get_file_tree(user=user, version=version)
    except HTTPError as e:
        dst.archive_job.update_target(
            addon_short_name,
            ARCHIVER_NETWORK_ERROR,
            errors=[e.data['error']],
        )
        raise
    result = AggregateStatResult(
        src_addon._id,
        addon_short_name,
        targets=[utils.aggregate_file_tree_metadata(addon_short_name, file_tree, user)],
    )
    return result

Exemple #3

0

Afficher le fichier

Fichier : tasks.py Projet : rmoorman/osf.io

 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = dst.archive_job.target_info()
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo]
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)

Exemple #4

0

Afficher le fichier

Fichier : tasks.py Projet : DanielSBrown/osf.io

def archive(job_pk):
    """Starts a celery.chord that runs stat_addon for each
    complete addon attached to the Node, then runs
    #archive_node with the result

    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger = get_task_logger(__name__)
    logger.info("Received archive task for Node: {0} into Node: {1}".format(src._id, dst._id))
    return celery.chain(
        [
            celery.group(
                stat_addon.si(
                    addon_short_name=target.name,
                    job_pk=job_pk,
                )
                for target in job.target_addons
            ),
            archive_node.s(
                job_pk=job_pk
            )
        ]
    )

Exemple #5

0

Afficher le fichier

 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = [
             each for each in dst.archive_job.target_info()
             if each is not None
         ]
     elif isinstance(exc, ArchivedFileNotFound):
         dst.archive_status = ARCHIVER_FILE_NOT_FOUND
         errors = {
             'missing_files': exc.missing_files,
             'draft': exc.draft_registration
         }
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo] if einfo else []
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)

Exemple #6

0

Afficher le fichier

Fichier : tasks.py Projet : XTech2K/osf.io

def archive_node(results, job_pk):
    """First use the results of #stat_node to check disk usage of the
    initated registration, then either fail the registration or
    create a celery.group group of subtasks to archive addons

    :param results: results from the #stat_addon subtasks spawned in #stat_node
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving node: {0}".format(src._id))
    stat_result = AggregateStatResult(
        src._id,
        src.title,
        targets=results,
    )
    if (NO_ARCHIVE_LIMIT not in job.initiator.system_tags) and (stat_result.disk_usage > settings.MAX_ARCHIVE_SIZE):
        raise ArchiverSizeExceeded(result=stat_result)
    else:
        if not results:
            job.status = ARCHIVER_SUCCESS
            job.save()
        for result in stat_result.targets:
            if not result.num_files:
                job.update_target(result.target_name, ARCHIVER_SUCCESS)
            else:
                archive_addon.delay(
                    addon_short_name=result.target_name,
                    job_pk=job_pk,
                    stat_result=result,
                )
        project_signals.archive_callback.send(dst)

Exemple #7

0

Afficher le fichier

Fichier : tasks.py Projet : MattVitting/osf.io

def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    for schema in dst.registered_schema:
        if schema.has_files:
            utils.migrate_file_metadata(dst, schema)
    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(dst.get_active_contributors_recursive(unique_users=True))

Exemple #8

0

Afficher le fichier

Fichier : tasks.py Projet : DanielSBrown/osf.io

 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = [
             each for each in
             dst.archive_job.target_info()
             if each is not None
         ]
     elif isinstance(exc, ArchivedFileNotFound):
         dst.archive_status = ARCHIVER_FILE_NOT_FOUND
         errors = {
             'missing_files': exc.missing_files,
             'draft': exc.draft_registration
         }
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo] if einfo else []
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)

Exemple #9

0

Afficher le fichier

Fichier : tasks.py Projet : GloriaaLi/osf.io

def archive_addon(addon_short_name, job_pk, stat_result):
    """Archive the contents of an addon by making a copy request to the
    WaterBulter API

    :param addon_short_name: AddonConfig.short_name of the addon to be archived
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    # Dataverse requires special handling for draft
    # and published content
    addon_name = addon_short_name
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving addon: {0} on node: {1}".format(addon_short_name, src._id))
    src_provider = src.get_addon(addon_name)
    folder_name = src_provider.archive_folder_name
    cookie = user.get_or_create_cookie()
    copy_url = settings.WATERBUTLER_URL + '/ops/copy'
    if addon_name == 'dataverse':
        # The dataverse API will not differentiate between published and draft files
        # unless expcicitly asked. We need to create seperate folders for published and
        # draft in the resulting archive.
        #
        # Additionally trying to run the archive without this distinction creates a race
        # condition that non-deterministically caused archive jobs to fail.
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (published)'.format(folder_name), cookie, revision='latest-published')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (draft)'.format(folder_name), cookie, revision='latest')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
    else:
        data = make_waterbutler_payload(src, dst, addon_name, folder_name, cookie)
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)

Exemple #10

0

Afficher le fichier

Fichier : tasks.py Projet : GloriaaLi/osf.io

def archive(job_pk):
    """Starts a celery.chord that runs stat_addon for each
    complete addon attached to the Node, then runs
    #archive_node with the result

    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger = get_task_logger(__name__)
    logger.info("Received archive task for Node: {0} into Node: {1}".format(src._id, dst._id))
    return celery.chain(
        [
            celery.group(
                stat_addon.si(
                    addon_short_name=target.name,
                    job_pk=job_pk,
                )
                for target in job.target_addons
            ),
            archive_node.s(
                job_pk=job_pk
            )
        ]
    )

Exemple #11

0

Afficher le fichier

Fichier : tasks.py Projet : GloriaaLi/osf.io

def stat_addon(addon_short_name, job_pk):
    """Collect metadata about the file tree of a given addon

    :param addon_short_name: AddonConfig.short_name of the addon to be examined
    :param job_pk: primary key of archive_job
    :return: AggregateStatResult containing file tree metadata
    """
    # Dataverse reqires special handling for draft and
    # published content
    addon_name = addon_short_name
    version = None
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        version = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    src_addon = src.get_addon(addon_name)
    try:
        file_tree = src_addon._get_file_tree(user=user, version=version)
    except HTTPError as e:
        dst.archive_job.update_target(
            addon_short_name,
            ARCHIVER_NETWORK_ERROR,
            errors=[e.data['error']],
        )
        raise
    result = AggregateStatResult(
        src_addon._id,
        addon_short_name,
        targets=[utils.aggregate_file_tree_metadata(addon_short_name, file_tree, user)],
    )
    return result

Exemple #12

0

Afficher le fichier

def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    for schema in dst.registered_schema:
        if schema.has_files:
            utils.migrate_file_metadata(dst, schema)
    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(
            dst.get_active_contributors_recursive(unique_users=True))

Exemple #13

0

Afficher le fichier

Fichier : tasks.py Projet : DanielSBrown/osf.io

def archive_addon(addon_short_name, job_pk, stat_result):
    """Archive the contents of an addon by making a copy request to the
    WaterBulter API

    :param addon_short_name: AddonConfig.short_name of the addon to be archived
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    # Dataverse requires special handling for draft
    # and published content
    addon_name = addon_short_name
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving addon: {0} on node: {1}".format(addon_short_name, src._id))
    src_provider = src.get_addon(addon_name)
    folder_name = src_provider.archive_folder_name
    cookie = user.get_or_create_cookie()
    copy_url = settings.WATERBUTLER_URL + '/ops/copy'
    if addon_name == 'dataverse':
        # The dataverse API will not differentiate between published and draft files
        # unless expcicitly asked. We need to create seperate folders for published and
        # draft in the resulting archive.
        #
        # Additionally trying to run the archive without this distinction creates a race
        # condition that non-deterministically caused archive jobs to fail.
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (published)'.format(folder_name), cookie, revision='latest-published')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (draft)'.format(folder_name), cookie, revision='latest')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
    else:
        data = make_waterbutler_payload(src, dst, addon_name, folder_name, cookie)
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)

Exemple #14

0

Afficher le fichier

Fichier : tasks.py Projet : rmoorman/osf.io

def archive_node(stat_results, job_pk):
    """First use the results of #stat_node to check disk usage of the
    initiated registration, then either fail the registration or
    create a celery.group group of subtasks to archive addons

    :param results: results from the #stat_addon subtasks spawned in #stat_node
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving node: {0}".format(src._id))

    if not isinstance(stat_results, list):
        stat_results = [stat_results]
    stat_result = AggregateStatResult(dst._id, dst.title, targets=stat_results)
    if (NO_ARCHIVE_LIMIT not in job.initiator.system_tags) and (
            stat_result.disk_usage > settings.MAX_ARCHIVE_SIZE):
        raise ArchiverSizeExceeded(result=stat_result)
    else:
        if not stat_result.targets:
            job.status = ARCHIVER_SUCCESS
            job.save()
        for result in stat_result.targets:
            if not result.num_files:
                job.update_target(result.target_name, ARCHIVER_SUCCESS)
            else:
                archive_addon.delay(
                    addon_short_name=result.target_name,
                    job_pk=job_pk,
                    stat_result=result,
                )
        project_signals.archive_callback.send(dst)

Exemple #15

0

Afficher le fichier

Fichier : clean_failed_archives.py Projet : wearpants/osf.io

def main(dry):
    if dry:
        logger.info('[DRY MODE]')
    init_app(routes=False)
    for _id in FAILED_ARCHIVE_JOBS:
        archive_job = ArchiveJob.load(_id)
        assert archive_job.status == ARCHIVER_INITIATED
        root_node = archive_job.dst_node.root
        with TokuTransaction():
            clean(reg=root_node, dry=dry)

Exemple #16

0

Afficher le fichier

Fichier : clean_failed_archives.py Projet : 545zhou/osf.io

def main(dry):
    if dry:
        logger.info('[DRY MODE]')
    init_app(routes=False)
    for _id in FAILED_ARCHIVE_JOBS:
        archive_job = ArchiveJob.load(_id)
        assert archive_job.status == ARCHIVER_INITIATED
        root_node = archive_job.dst_node.root
        with TokuTransaction():
            clean(reg=root_node, dry=dry)

Exemple #17

0

Afficher le fichier

def make_copy_request(job_pk, url, data):
    """Make the copy request to the WaterBulter API and handle
    successful and failed responses

    :param job_pk: primary key of ArchiveJob
    :param url: URL to send request to
    :param data: <dict> of setting to send in POST to WaterBulter API
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    provider = data['source']['provider']
    logger.info("Sending copy request for addon: {0} on node: {1}".format(provider, dst._id))
    requests.post(url, data=json.dumps(data))

Exemple #18

0

Afficher le fichier

Fichier : tasks.py Projet : Alpani/osf.io

def make_copy_request(job_pk, url, data):
    """Make the copy request to the WaterBulter API and handle
    successful and failed responses

    :param job_pk: primary key of ArchiveJob
    :param url: URL to send request to
    :param data: <dict> of setting to send in POST to WaterBulter API
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    provider = data['source']['provider']
    logger.info("Sending copy request for addon: {0} on node: {1}".format(provider, dst._id))
    requests.post(url, data=json.dumps(data))

Exemple #19

0

Afficher le fichier

def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    prereg_schema = MetaSchema.find_one(
        Q('name', 'eq', 'Prereg Challenge') & Q('schema_version', 'eq', 2))
    missing_files = []
    if prereg_schema in dst.registered_schema:
        prereg_metadata = dst.registered_meta[prereg_schema._id]
        updated_metadata = {}
        for key, question in prereg_metadata.items():
            if isinstance(question['value'], dict):
                for subkey, subvalue in question['value'].items():
                    registration_file = None
                    if subvalue.get('extra', {}).get('sha256'):
                        registration_file, node_id = find_registration_file(
                            subvalue, dst)
                        if not registration_file:
                            missing_files.append({
                                'file_name':
                                subvalue['extra']['selectedFileName'],
                                'question_title':
                                find_question(prereg_schema.schema,
                                              key)['title']
                            })
                            continue
                        subvalue['extra'].update({
                            'viewUrl':
                            VIEW_FILE_URL_TEMPLATE.format(
                                node_id=node_id,
                                path=registration_file['path'].lstrip('/'))
                        })
                    question['value'][subkey] = subvalue
            else:
                if question.get('extra', {}).get('sha256'):
                    registration_file, node_id = find_registration_file(
                        question, dst)
                    if not registration_file:
                        missing_files.append({
                            'file_name':
                            question['extra']['selectedFileName'],
                            'question_title':
                            find_question(prereg_schema.schema, key)['title']
                        })
                        continue
                    question['extra'].update({
                        'viewUrl':
                        VIEW_FILE_URL_TEMPLATE.format(
                            node_id=node_id,
                            path=registration_file['path'].lstrip('/'))
                    })
            updated_metadata[key] = question

        if missing_files:
            raise ArchivedFileNotFound(registration=dst,
                                       missing_files=missing_files)

        prereg_metadata.update(updated_metadata)
        dst.registered_meta[prereg_schema._id] = prereg_metadata
        dst.save()

    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(
            dst.get_active_contributors_recursive(unique_users=True))

Exemple #20

0

Afficher le fichier

Fichier : tasks.py Projet : DanielSBrown/osf.io

def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    prereg_schema = MetaSchema.find_one(
        Q('name', 'eq', 'Prereg Challenge') &
        Q('schema_version', 'eq', 2)
    )
    missing_files = []
    if prereg_schema in dst.registered_schema:
        prereg_metadata = dst.registered_meta[prereg_schema._id]
        updated_metadata = {}
        for key, question in prereg_metadata.items():
            if isinstance(question['value'], dict):
                for subkey, subvalue in question['value'].items():
                    registration_file = None
                    if subvalue.get('extra', {}).get('sha256'):
                        registration_file, node_id = find_registration_file(subvalue, dst)
                        if not registration_file:
                            missing_files.append({
                                'file_name': subvalue['extra']['selectedFileName'],
                                'question_title': find_question(prereg_schema.schema, key)['title']
                            })
                            continue
                        subvalue['extra'].update({
                            'viewUrl': VIEW_FILE_URL_TEMPLATE.format(node_id=node_id, path=registration_file['path'].lstrip('/'))
                        })
                    question['value'][subkey] = subvalue
            else:
                if question.get('extra', {}).get('sha256'):
                    registration_file, node_id = find_registration_file(question, dst)
                    if not registration_file:
                        missing_files.append({
                            'file_name': question['extra']['selectedFileName'],
                            'question_title': find_question(prereg_schema.schema, key)['title']
                        })
                        continue
                    question['extra'].update({
                        'viewUrl': VIEW_FILE_URL_TEMPLATE.format(node_id=node_id, path=registration_file['path'].lstrip('/'))
                    })
            updated_metadata[key] = question

        if missing_files:
            raise ArchivedFileNotFound(
                registration=dst,
                missing_files=missing_files
            )

        prereg_metadata.update(updated_metadata)
        dst.registered_meta[prereg_schema._id] = prereg_metadata
        dst.save()

    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(dst.get_active_contributors_recursive(unique_users=True))