コード例 #1
0
def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    for schema in dst.registered_schema.all():
        if schema.has_files:
            utils.migrate_file_metadata(dst, schema)
    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(dst.get_active_contributors_recursive(unique_users=True))
コード例 #2
0
 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = [
             each for each in
             dst.archive_job.target_info()
             if each is not None
         ]
     elif isinstance(exc, ArchivedFileNotFound):
         dst.archive_status = ARCHIVER_FILE_NOT_FOUND
         errors = {
             'missing_files': exc.missing_files,
             'draft': exc.draft_registration
         }
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo] if einfo else []
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)
コード例 #3
0
def archive_node(stat_results, job_pk):
    """First use the results of #stat_node to check disk usage of the
    initiated registration, then either fail the registration or
    create a celery.group group of subtasks to archive addons

    :param results: results from the #stat_addon subtasks spawned in #stat_node
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info('Archiving node: {0}'.format(src._id))

    if not isinstance(stat_results, list):
        stat_results = [stat_results]
    stat_result = AggregateStatResult(dst._id, dst.title, targets=stat_results)
    if (NO_ARCHIVE_LIMIT not in job.initiator.system_tags) and (
            stat_result.disk_usage > settings.MAX_ARCHIVE_SIZE):
        raise ArchiverSizeExceeded(result=stat_result)
    else:
        if not stat_result.targets:
            job.status = ARCHIVER_SUCCESS
            job.save()
        for result in stat_result.targets:
            if not result['num_files']:
                job.update_target(result['target_name'], ARCHIVER_SUCCESS)
            else:
                archive_addon.delay(addon_short_name=result['target_name'],
                                    job_pk=job_pk)
        project_signals.archive_callback.send(dst)
コード例 #4
0
def archive(job_pk):
    """Starts a celery.chord that runs stat_addon for each
    complete addon attached to the Node, then runs
    #archive_node with the result

    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger = get_task_logger(__name__)
    logger.info('Received archive task for Node: {0} into Node: {1}'.format(src._id, dst._id))
    return celery.chain(
        [
            celery.group(
                stat_addon.si(
                    addon_short_name=target.name,
                    job_pk=job_pk,
                )
                for target in job.target_addons.all()
            ),
            archive_node.s(
                job_pk=job_pk
            )
        ]
    )
コード例 #5
0
def archive_addon(addon_short_name, job_pk, stat_result):
    """Archive the contents of an addon by making a copy request to the
    WaterBulter API

    :param addon_short_name: AddonConfig.short_name of the addon to be archived
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info('Archiving addon: {0} on node: {1}'.format(addon_short_name, src._id))

    cookie = user.get_or_create_cookie()
    params = {'cookie': cookie}
    rename_suffix = ''
    # The dataverse API will not differentiate between published and draft files
    # unless expcicitly asked. We need to create seperate folders for published and
    # draft in the resulting archive.
    #
    # Additionally trying to run the archive without this distinction creates a race
    # condition that non-deterministically caused archive jobs to fail.
    if 'dataverse' in addon_short_name:
        params['revision'] = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
        rename_suffix = ' (draft)' if addon_short_name.split('-')[-1] == 'draft' else ' (published)'
        addon_short_name = 'dataverse'
    src_provider = src.get_addon(addon_short_name)
    folder_name = src_provider.archive_folder_name
    rename = '{}{}'.format(folder_name, rename_suffix)
    url = waterbutler_api_url_for(src._id, addon_short_name, _internal=True, **params)
    data = make_waterbutler_payload(dst._id, rename)
    make_copy_request.delay(job_pk=job_pk, url=url, data=data)
コード例 #6
0
def find_failed_registrations():
    expired_if_before = datetime.utcnow() - ARCHIVE_TIMEOUT_TIMEDELTA
    jobs = ArchiveJob.find(
        Q('sent', 'eq', False) &
        Q('datetime_initiated', 'lt', expired_if_before) &
        Q('status', 'eq', ARCHIVER_INITIATED)
    )
    return sorted({
        node.root for node in [job.dst_node for job in jobs]
        if node and node.root
        and not node.root.is_deleted
    }, key=lambda n: n.registered_date)
コード例 #7
0
def find_failed_registrations():
    expired_if_before = datetime.utcnow() - ARCHIVE_TIMEOUT_TIMEDELTA
    jobs = ArchiveJob.find(
        Q('sent', 'eq', False)
        & Q('datetime_initiated', 'lt', expired_if_before)
        & Q('status', 'eq', ARCHIVER_INITIATED))
    return sorted(
        {
            node.root
            for node in [job.dst_node for job in jobs]
            if node and node.root and not node.root.is_deleted
        },
        key=lambda n: n.registered_date)
コード例 #8
0
def make_copy_request(job_pk, url, data):
    """Make the copy request to the WaterBulter API and handle
    successful and failed responses

    :param job_pk: primary key of ArchiveJob
    :param url: URL to send request to
    :param data: <dict> of setting to send in POST to WaterBulter API
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info('Sending copy request for addon: {0} on node: {1}'.format(data['provider'], dst._id))
    res = requests.post(url, data=json.dumps(data))
    if res.status_code not in (http.OK, http.CREATED, http.ACCEPTED):
        raise HTTPError(res.status_code)
コード例 #9
0
ファイル: tasks.py プロジェクト: BRosenblatt/osf.io
def archive_addon(addon_short_name, job_pk, stat_result):
    """Archive the contents of an addon by making a copy request to the
    WaterBulter API

    :param addon_short_name: AddonConfig.short_name of the addon to be archived
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    # Dataverse requires special handling for draft
    # and published content
    addon_name = addon_short_name
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        revision = 'latest' if addon_short_name.split(
            '-')[-1] == 'draft' else 'latest-published'
        folder_name_suffix = 'draft' if addon_short_name.split(
            '-')[-1] == 'draft' else 'published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info('Archiving addon: {0} on node: {1}'.format(
        addon_short_name, src._id))
    src_provider = src.get_addon(addon_name)
    folder_name = src_provider.archive_folder_name
    cookie = user.get_or_create_cookie()
    copy_url = settings.WATERBUTLER_INTERNAL_URL + '/ops/copy'
    if addon_name == 'dataverse':
        # The dataverse API will not differentiate between published and draft files
        # unless expcicitly asked. We need to create seperate folders for published and
        # draft in the resulting archive.
        #
        # Additionally trying to run the archive without this distinction creates a race
        # condition that non-deterministically caused archive jobs to fail.
        data = make_waterbutler_payload(src,
                                        dst,
                                        addon_name,
                                        '{0} ({1})'.format(
                                            folder_name, folder_name_suffix),
                                        cookie,
                                        revision=revision)
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
    else:
        data = make_waterbutler_payload(src, dst, addon_name, folder_name,
                                        cookie)
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
コード例 #10
0
def stat_addon(addon_short_name, job_pk):
    """Collect metadata about the file tree of a given addon

    :param addon_short_name: AddonConfig.short_name of the addon to be examined
    :param job_pk: primary key of archive_job
    :return: AggregateStatResult containing file tree metadata
    """
    # Dataverse reqires special handling for draft and
    # published content
    addon_name = addon_short_name
    version = None
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        version = 'latest' if addon_short_name.split(
            '-')[-1] == 'draft' else 'latest-published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    src_addon = src.get_addon(addon_name)
    if hasattr(src_addon, 'configured') and not src_addon.configured:
        # Addon enabled but not configured - no file trees, nothing to archive.
        return AggregateStatResult(src_addon._id, addon_short_name)
    try:
        file_tree = src_addon._get_file_tree(user=user, version=version)
    except HTTPError as e:
        dst.archive_job.update_target(
            addon_short_name,
            ARCHIVER_NETWORK_ERROR,
            errors=[e.data['error']],
        )
        raise
    result = AggregateStatResult(
        src_addon._id,
        addon_short_name,
        targets=[
            utils.aggregate_file_tree_metadata(addon_short_name, file_tree,
                                               user)
        ],
    )
    return result
コード例 #11
0
ファイル: tasks.py プロジェクト: adlius/osf.io
def archive_node(stat_results, job_pk):
    """First use the results of #stat_node to check disk usage of the
    initiated registration, then either fail the registration or
    create a celery.group group of subtasks to archive addons

    :param results: results from the #stat_addon subtasks spawned in #stat_node
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info('Archiving node: {0}'.format(src._id))

    if not isinstance(stat_results, list):
        stat_results = [stat_results]
    stat_result = AggregateStatResult(
        dst._id,
        dst.title,
        targets=stat_results
    )
    if (NO_ARCHIVE_LIMIT not in job.initiator.system_tags) and (stat_result.disk_usage > settings.MAX_ARCHIVE_SIZE):
        raise ArchiverSizeExceeded(result=stat_result)
    else:
        if not stat_result.targets:
            job.status = ARCHIVER_SUCCESS
            job.save()
        for result in stat_result.targets:
            if not result.num_files:
                job.update_target(result.target_name, ARCHIVER_SUCCESS)
            else:
                archive_addon.delay(
                    addon_short_name=result.target_name,
                    job_pk=job_pk,
                    stat_result=result,
                )
        project_signals.archive_callback.send(dst)
コード例 #12
0
ファイル: tasks.py プロジェクト: adlius/osf.io
def stat_addon(addon_short_name, job_pk):
    """Collect metadata about the file tree of a given addon

    :param addon_short_name: AddonConfig.short_name of the addon to be examined
    :param job_pk: primary key of archive_job
    :return: AggregateStatResult containing file tree metadata
    """
    # Dataverse reqires special handling for draft and
    # published content
    addon_name = addon_short_name
    version = None
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        version = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    src_addon = src.get_addon(addon_name)
    if hasattr(src_addon, 'configured') and not src_addon.configured:
        # Addon enabled but not configured - no file trees, nothing to archive.
        return AggregateStatResult(src_addon._id, addon_short_name)
    try:
        file_tree = src_addon._get_file_tree(user=user, version=version)
    except HTTPError as e:
        dst.archive_job.update_target(
            addon_short_name,
            ARCHIVER_NETWORK_ERROR,
            errors=[e.data['error']],
        )
        raise
    result = AggregateStatResult(
        src_addon._id,
        addon_short_name,
        targets=[utils.aggregate_file_tree_metadata(addon_short_name, file_tree, user)],
    )
    return result
コード例 #13
0
ファイル: factories.py プロジェクト: envobe/osf.io
    def _create(cls,
                target_class,
                project=None,
                is_public=False,
                schema=None,
                data=None,
                archive=False,
                embargo=None,
                registration_approval=None,
                retraction=None,
                *args,
                **kwargs):
        save_kwargs(**kwargs)
        user = None
        if project:
            user = project.creator
        user = kwargs.get('user') or kwargs.get(
            'creator') or user or UserFactory()
        kwargs['creator'] = user
        # Original project to be registered
        project = project or target_class(*args, **kwargs)
        if user._id not in project.permissions:
            project.add_contributor(
                contributor=user,
                permissions=permissions.CREATOR_PERMISSIONS,
                log=False,
                save=False)
        project.save()

        # Default registration parameters
        schema = schema or get_default_metaschema()
        data = data or {'some': 'data'}
        auth = Auth(user=user)
        register = lambda: project.register_node(
            schema=schema, auth=auth, data=data)

        def add_approval_step(reg):
            if embargo:
                reg.embargo = embargo
            elif registration_approval:
                reg.registration_approval = registration_approval
            elif retraction:
                reg.retraction = retraction
            else:
                reg.require_approval(reg.creator)
            reg.save()
            reg.sanction.add_authorizer(reg.creator, reg)
            reg.sanction.save()

        with patch('framework.celery_tasks.handlers.enqueue_task'):
            reg = register()
            add_approval_step(reg)
        if not archive:
            with patch.object(reg.archive_job, 'archive_tree_finished',
                              Mock(return_value=True)):
                reg.archive_job.status = ARCHIVER_SUCCESS
                reg.archive_job.save()
                reg.sanction.state = Sanction.APPROVED
                reg.sanction.save()
        ArchiveJob(
            src_node=project,
            dst_node=reg,
            initiator=user,
        )
        if is_public:
            reg.is_public = True
        reg.save()
        return reg