Esempio n. 1
0
def run_next_capture():
    """ Grab and run the next CaptureJob. This will keep calling itself until there are no jobs left. """
    capture_job = CaptureJob.get_next_job(reserve=True)
    if not capture_job:
        return  # no jobs waiting
    proxy_capture.apply([capture_job.link_id])
    run_task(run_next_capture.s())
Esempio n. 2
0
    def patch(self, request, guid, format=None):
        """ Update link. """
        link = self.get_object_for_user_by_pk(request.user, guid)

        was_private = link.is_private
        data = request.data

        serializer = self.serializer_class(link, data=data, partial=True, context={'request': self.request})
        if serializer.is_valid():
            serializer.save()

            # move to new folder
            folder = AuthenticatedLinkListView.get_folder_from_request(request)
            if folder:
                link.move_to_folder_for_user(folder, request.user)

            # handle file patch
            uploaded_file = request.data.get('file')
            if uploaded_file:

                # delete related cdxlines and captures, delete warc (rename)
                link.delete_related()
                link.safe_delete_warc()

                # write new warc and capture
                link.write_uploaded_file(uploaded_file, cache_break=True)

                # delete the link from Webrecorder and
                # clear the user's Webrecorder session, if any,
                # so that the new warc is used for this visitor's
                # next playback of this link.
                if settings.ENABLE_WR_PLAYBACK:
                    link.delete_from_wr(request)
                    clear_wr_session(request)

            # update internet archive if privacy changes
            if 'is_private' in data and was_private != bool(data.get("is_private")) and link.is_archive_eligible():
                if was_private:
                    # link was private but has been marked public
                    run_task(upload_to_internet_archive.s(link_guid=link.guid))

                else:
                    # link was public but has been marked private
                    run_task(delete_from_internet_archive.s(link_guid=link.guid))

            # include remaining links in response
            links_remaining = request.user.get_links_remaining()
            serializer.data['links_remaining'] = 'Infinity' if links_remaining[0] == float('inf') else links_remaining[0]
            serializer.data['links_remaining_period'] = links_remaining[1]

            # clear out any caches that might be based on old link data
            link.clear_cache()

            return Response(serializer.data)

        raise ValidationError(serializer.errors)
Esempio n. 3
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(
                bundle.request, {
                    'archives': {
                        '__all__':
                        "Perma has paused archive creation for scheduled maintenance. Please try again shortly."
                    },
                    'reason':
                    "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
                }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource,
                       self).obj_create(bundle, created_by=bundle.request.user)
        asset = Asset(link=bundle.obj)

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type][
                'new_extension']
            file_path = os.path.join(asset.base_storage_path, file_name)

            uploaded_file.file.seek(0)
            file_name = default_storage.store_file(uploaded_file, file_path)

            if mime_type == 'application/pdf':
                asset.pdf_capture = file_name
            else:
                asset.image_capture = file_name
            asset.user_upload = True
            asset.user_upload_file_name = uploaded_file.name
            asset.save()
        else:
            asset.image_capture = Asset.CAPTURE_STATUS_PENDING
            # If it appears as if we're trying to archive a PDF, only run our PDF retrieval tool
            if asset.link.media_type == 'pdf':
                asset.pdf_capture = Asset.CAPTURE_STATUS_PENDING
                task = get_pdf
            else:  # else, it's not a PDF. Let's try our best to retrieve what we can
                asset.warc_capture = Asset.CAPTURE_STATUS_PENDING
                task = proxy_capture

            asset.save()
            run_task(
                task.s(asset.link.guid, asset.link.submitted_url,
                       asset.base_storage_path,
                       bundle.request.META.get('HTTP_USER_AGENT', '')))

        return bundle
Esempio n. 4
0
def upload_all_to_internet_archive():
    # find all links created 48-24 hours ago
    # include timezone
    start_date = timezone.now() - timedelta(days=2)
    end_date   = timezone.now() - timedelta(days=1)

    links = Link.objects.filter(Q(internet_archive_upload_status='not_started') | Q(internet_archive_upload_status='failed'), creation_timestamp__range=(start_date, end_date))
    for link in links:
        if link.can_upload_to_internet_archive():
            run_task(upload_to_internet_archive.s(link_guid=link.guid))
Esempio n. 5
0
    def patch(self, request, guid, format=None):
        """ Update link. """
        link = self.get_object_for_user_by_pk(request.user, guid)

        was_private = link.is_private
        data = request.data

        serializer = self.serializer_class(link, data=data, partial=True, context={'request': self.request})
        if serializer.is_valid():
            serializer.save()

            # move to new folder
            folder = AuthenticatedLinkListView.get_folder_from_request(request)
            if folder:
                link.move_to_folder_for_user(folder, request.user)

            # handle file patch
            uploaded_file = request.data.get('file')
            if uploaded_file:

                # delete related cdxlines and captures, delete warc (rename)
                link.delete_related()
                link.safe_delete_warc()

                # write new warc and capture
                link.write_uploaded_file(uploaded_file, cache_break=True)
                link.warc_size = default_storage.size(link.warc_storage_file())
                link.save()

                # delete the link from Webrecorder and
                # clear the user's Webrecorder session, if any,
                # so that the new warc is used for this visitor's
                # next playback of this link.
                link.delete_from_wr(request)
                clear_wr_session(request)

            # update internet archive if privacy changes
            if 'is_private' in data and was_private != bool(data.get("is_private")) and link.is_archive_eligible():
                if was_private:
                    # link was private but has been marked public
                    run_task(upload_to_internet_archive.s(link_guid=link.guid))

                else:
                    # link was public but has been marked private
                    run_task(delete_from_internet_archive.s(link_guid=link.guid))

            # include remaining links in response
            links_remaining = request.user.get_links_remaining()
            serializer.data['links_remaining'] = 'Infinity' if links_remaining[0] == float('inf') else links_remaining[0]
            serializer.data['links_remaining_period'] = links_remaining[1]

            return Response(serializer.data)

        raise ValidationError(serializer.errors)
Esempio n. 6
0
def sync_mirror():
    metadata_server = settings.UPSTREAM_SERVER['address']
    manifest_url = metadata_server + reverse("mirroring:manifest")
    metadata = requests.get(
        manifest_url,
        headers=settings.UPSTREAM_SERVER.get('headers', {}),
        stream=True)
    for line in metadata.iter_lines():
        guid = line.strip()
        if not Link.objects.filter(guid==guid).exists():
            run_task(update_perma, link_guid=guid)
Esempio n. 7
0
def upload_all_to_internet_archive():
    # find all links created 48-24 hours ago
    # include timezone
    start_date = timezone.now() - timedelta(days=2)
    end_date = timezone.now() - timedelta(days=1)

    links = Link.objects.filter(uploaded_to_internet_archive=False,
                                creation_timestamp__range=(start_date,
                                                           end_date))
    for link in links:
        if link.can_upload_to_internet_archive():
            run_task(upload_to_internet_archive.s(link_guid=link.guid))
Esempio n. 8
0
    def obj_update(self, bundle, skip_errors=False, **kwargs):
        uploaded_file = bundle.data.get('file')
        if uploaded_file and bundle.request.method == 'PATCH':
            if kwargs['request']:
                del kwargs['request']

            bundle.obj = self.obj_get(bundle=bundle, **kwargs)
            bundle.data["replace"] = True

            # delete related cdxlines and captures, delete warc (rename)
            self.obj_delete(bundle=bundle, **kwargs)

            bundle = super(LinkResource, self).obj_update(
                bundle, archive_timestamp=bundle.obj.archive_timestamp)

            # if no 'folder' was supplied for this patch, set folder to current folder before calling obj_create,
            # since obj_create requires a folder to be included
            bundle.data.setdefault(
                'folder',
                Folder.objects.accessible_to(
                    bundle.request.user).filter(links=bundle.obj).first())

            bundle = self.obj_create(bundle=bundle, **kwargs)

        else:
            is_private = bundle.obj.is_private
            bundle = super(LinkResource,
                           self).obj_update(bundle, skip_errors, **kwargs)

            if bundle.data.get('folder', None):
                bundle.obj.move_to_folder_for_user(bundle.data['folder'],
                                                   bundle.request.user)

            if 'is_private' in bundle.data:
                if bundle.obj.is_archive_eligible():
                    going_private = bundle.data.get("is_private")
                    # if link was private but has been marked public
                    if is_private and not going_private:
                        run_task(
                            upload_to_internet_archive.s(
                                link_guid=bundle.obj.guid))

                    # if link was public but has been marked private
                    elif not is_private and going_private:
                        run_task(
                            delete_from_internet_archive.s(
                                link_guid=bundle.obj.guid))
            links_remaining = bundle.request.user.get_links_remaining()
            bundle.data['links_remaining'] = links_remaining

        bundle.obj.clear_cache()

        return bundle
Esempio n. 9
0
    def patch(self, request, guid, format=None):
        """ Update link. """
        link = self.get_object_for_user_by_pk(request.user, guid)

        was_private = link.is_private
        data = request.data

        serializer = self.serializer_class(link,
                                           data=data,
                                           partial=True,
                                           context={'request': self.request})
        if serializer.is_valid():
            serializer.save()

            # move to new folder
            folder = AuthenticatedLinkListView.get_folder_from_request(request)
            if folder:
                link.move_to_folder_for_user(folder, request.user)

            # handle file patch
            uploaded_file = request.data.get('file')
            if uploaded_file:

                # delete related cdxlines and captures, delete warc (rename)
                link.delete_related()
                link.safe_delete_warc()

                # write new warc and capture
                link.write_uploaded_file(uploaded_file, cache_break=True)

            # update internet archive if privacy changes
            if 'is_private' in data and was_private != bool(
                    data.get("is_private")) and link.is_archive_eligible():
                if was_private:
                    # link was private but has been marked public
                    run_task(upload_to_internet_archive.s(link_guid=link.guid))

                else:
                    # link was public but has been marked private
                    run_task(
                        delete_from_internet_archive.s(link_guid=link.guid))

            # include remaining links in response
            links_remaining = request.user.get_links_remaining()
            serializer.data['links_remaining'] = links_remaining

            # clear out any caches that might be based on old link data
            link.clear_cache()

            return Response(serializer.data)

        raise ValidationError(serializer.errors)
Esempio n. 10
0
    def obj_update(self, bundle, skip_errors=False, **kwargs):
        was_vested = bundle.obj.vested

        bundle = super(LinkResource, self).obj_update(bundle, skip_errors, **kwargs)

        if bundle.data.get('folder', None):
            bundle.obj.move_to_folder_for_user(bundle.data['folder'], bundle.request.user)

        if not was_vested and bundle.obj.vested:
            if settings.UPLOAD_TO_INTERNET_ARCHIVE and bundle.obj.can_upload_to_internet_archive():
                run_task(upload_to_internet_archive, link_guid=bundle.obj.guid)

        return bundle
Esempio n. 11
0
    def obj_delete(self, bundle, **kwargs):
        if not hasattr(bundle.obj, 'delete'):
            try:
                bundle.obj = self.obj_get(bundle=bundle, **kwargs)
            except ObjectDoesNotExist:
                raise NotFound("A model instance matching the provided arguments could not be found.")

        self.authorized_delete_detail(self.get_object_list(bundle.request), bundle)

        bundle.obj.safe_delete()
        bundle.obj.save()
        if bundle.obj.uploaded_to_internet_archive:
            run_task(delete_from_internet_archive.s(link_guid=bundle.obj.guid))
Esempio n. 12
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."},
                'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
            }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)
        asset = Asset(link=bundle.obj)

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension']
            file_path = os.path.join(asset.base_storage_path, file_name)

            uploaded_file.file.seek(0)
            file_name = default_storage.store_file(uploaded_file, file_path)

            if mime_type == 'application/pdf':
                asset.pdf_capture = file_name
            else:
                asset.image_capture = file_name
            asset.user_upload = True
            asset.user_upload_file_name = uploaded_file.name
            asset.save()
        else:
            asset.image_capture = Asset.CAPTURE_STATUS_PENDING
            # If it appears as if we're trying to archive a PDF, only run our PDF retrieval tool
            if asset.link.media_type == 'pdf':
                asset.pdf_capture = Asset.CAPTURE_STATUS_PENDING
                task = get_pdf
            else:  # else, it's not a PDF. Let's try our best to retrieve what we can
                asset.warc_capture = Asset.CAPTURE_STATUS_PENDING
                task = proxy_capture

            asset.save()
            run_task(task.s(asset.link.guid,
                            asset.link.submitted_url,
                            asset.base_storage_path,
                            bundle.request.META.get('HTTP_USER_AGENT', '')))

        return bundle
Esempio n. 13
0
def run_next_capture():
    """ Grab and run the next CaptureJob. This will keep calling itself until there are no jobs left. """
    capture_job = CaptureJob.get_next_job(reserve=True)
    if not capture_job:
        return  # no jobs waiting
    try:
        proxy_capture(capture_job)
    except:
        print "Exception while processing capture job %s:" % capture_job.link_id
        traceback.print_exc()
    finally:
        capture_job.link.captures.filter(status='pending').update(status='failed')
        if capture_job.status == 'pending':
            capture_job.mark_completed('failed')
    run_task(run_next_capture.s())
Esempio n. 14
0
    def obj_update(self, bundle, skip_errors=False, **kwargs):
        was_vested = bundle.obj.vested

        bundle = super(LinkResource, self).obj_update(bundle, skip_errors,
                                                      **kwargs)

        if bundle.data.get('folder', None):
            bundle.obj.move_to_folder_for_user(bundle.data['folder'],
                                               bundle.request.user)

        if not was_vested and bundle.obj.vested:
            if settings.UPLOAD_TO_INTERNET_ARCHIVE and bundle.obj.can_upload_to_internet_archive(
            ):
                run_task(upload_to_internet_archive, link_guid=bundle.obj.guid)

        return bundle
Esempio n. 15
0
def run_next_capture():
    """ Grab and run the next CaptureJob. This will keep calling itself until there are no jobs left. """
    capture_job = CaptureJob.get_next_job(reserve=True)
    if not capture_job:
        return  # no jobs waiting
    try:
        proxy_capture(capture_job)
    except:
        print "Exception while processing capture job %s:" % capture_job.link_id
        traceback.print_exc()
    finally:
        capture_job.link.captures.filter(status='pending').update(
            status='failed')
        if capture_job.status == 'pending':
            capture_job.mark_completed('failed')
    run_task(run_next_capture.s())
Esempio n. 16
0
    def obj_update(self, bundle, skip_errors=False, **kwargs):
        uploaded_file = bundle.data.get('file')
        if uploaded_file and bundle.request.method == 'PATCH':
            if kwargs['request']:
                del kwargs['request']

            bundle.obj = self.obj_get(bundle=bundle, **kwargs)
            bundle.data["replace"]=True

            # delete related cdxlines and captures, delete warc (rename)
            self.obj_delete(bundle=bundle, **kwargs)

            bundle = super(LinkResource, self).obj_update(bundle, archive_timestamp=bundle.obj.archive_timestamp)

            # if no 'folder' was supplied for this patch, set folder to current folder before calling obj_create,
            # since obj_create requires a folder to be included
            bundle.data.setdefault('folder', Folder.objects.accessible_to(bundle.request.user).filter(links=bundle.obj).first())

            bundle = self.obj_create(bundle=bundle, **kwargs)

        else:
            is_private = bundle.obj.is_private
            bundle = super(LinkResource, self).obj_update(bundle, skip_errors, **kwargs)

            if bundle.data.get('folder', None):
                bundle.obj.move_to_folder_for_user(bundle.data['folder'], bundle.request.user)

            if 'is_private' in bundle.data:
                if bundle.obj.is_archive_eligible():
                    going_private = bundle.data.get("is_private")
                    # if link was private but has been marked public
                    if is_private and not going_private:
                        run_task(upload_to_internet_archive.s(link_guid=bundle.obj.guid))

                    # if link was public but has been marked private
                    elif not is_private and going_private:
                        run_task(delete_from_internet_archive.s(link_guid=bundle.obj.guid))
            links_remaining = bundle.request.user.get_links_remaining()
            bundle.data['links_remaining'] = links_remaining

        bundle.obj.clear_cache()

        return bundle
Esempio n. 17
0
    def obj_delete(self, bundle, **kwargs):
        if not hasattr(bundle.obj, 'delete'):
            try:
                bundle.obj = self.obj_get(bundle=bundle, **kwargs)
            except ObjectDoesNotExist:
                raise NotFound("A model instance matching the provided arguments could not be found.")

        self.authorized_delete_detail(self.get_object_list(bundle.request), bundle)

        # deleting related captures and cdxlines
        bundle.obj.delete_related()

        # if replacing file, only "delete" warc by renaming
        if bundle.data.get("replace"):
            bundle.obj.safe_delete_warc()
        else:
            bundle.obj.safe_delete()

        bundle.obj.save()
        if bundle.obj.internet_archive_upload_status == 'completed':
            run_task(delete_from_internet_archive.s(link_guid=bundle.obj.guid))
Esempio n. 18
0
    def obj_update(self, bundle, skip_errors=False, **kwargs):
        is_private = bundle.obj.is_private
        bundle = super(LinkResource, self).obj_update(bundle, skip_errors, **kwargs)

        if bundle.data.get('folder', None):
            bundle.obj.move_to_folder_for_user(bundle.data['folder'], bundle.request.user)

        if 'is_private' in bundle.data:
            if bundle.obj.is_archive_eligible():
                going_private = bundle.data.get("is_private")
                # if link was private but has been marked public
                if is_private and not going_private:
                    run_task(upload_to_internet_archive.s(link_guid=bundle.obj.guid))

                # if link was public but has been marked private
                elif not is_private and going_private:
                    run_task(delete_from_internet_archive.s(link_guid=bundle.obj.guid))

        links_remaining = bundle.request.user.get_links_remaining()
        bundle.data['links_remaining'] = links_remaining
        return bundle
Esempio n. 19
0
    def obj_delete(self, bundle, **kwargs):
        if not hasattr(bundle.obj, 'delete'):
            try:
                bundle.obj = self.obj_get(bundle=bundle, **kwargs)
            except ObjectDoesNotExist:
                raise NotFound("A model instance matching the provided arguments could not be found.")

        self.authorized_delete_detail(self.get_object_list(bundle.request), bundle)

        # deleting related captures and cdxlines
        bundle.obj.delete_related()

        # if replacing file, only "delete" warc by renaming
        if bundle.data.get("replace"):
            bundle.obj.safe_delete_warc()
        else:
            bundle.obj.safe_delete()

        bundle.obj.save()
        if bundle.obj.internet_archive_upload_status == 'completed':
            run_task(delete_from_internet_archive.s(link_guid=bundle.obj.guid))
Esempio n. 20
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(
                response=self.error_response(
                    bundle.request,
                    {
                        "archives": {
                            "__all__": "Perma has paused archive creation for scheduled maintenance. Please try again shortly."
                        },
                        "reason": "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
                    },
                )
            )

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)
        link = bundle.obj

        uploaded_file = bundle.data.get("file")
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = "upload.%s" % mime_type_lookup[mime_type]["new_extension"]
            warc_url = "file:///%s/%s" % (link.guid, file_name)

            capture = Capture(
                link=link,
                role="primary",
                status="success",
                record_type="resource",
                user_upload="True",
                content_type=mime_type,
                url=warc_url,
            )

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(link=link, role="primary", status="pending", record_type="response", url=link.submitted_url).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role="screenshot",
                status="pending",
                record_type="resource",
                url="file:///%s/cap.png" % link.guid,
                content_type="image/png",
            ).save()

            # kick off capture task
            run_task(proxy_capture.s(link.guid, bundle.request.META.get("HTTP_USER_AGENT", "")))

        return bundle
Esempio n. 21
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(
                bundle.request, {
                    'archives': {
                        '__all__':
                        "Perma has paused archive creation for scheduled maintenance. Please try again shortly."
                    },
                    'reason':
                    "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
                }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        if not bundle.data.get('replace'):
            bundle = super(LinkResource,
                           self).obj_create(bundle,
                                            created_by=bundle.request.user)

        link = bundle.obj
        link.save()

        # put link in folder and handle Org settings based on folder
        folder = bundle.data.get('folder')
        if folder.organization and folder.organization.default_to_private:
            link.is_private = True
            link.save()
        link.move_to_folder_for_user(
            folder, bundle.request.user)  # also sets link.organization

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type][
                'new_extension']

            base_warc_url = "file:///%s/%s" % (link.guid, file_name)

            # only append a random number to warc_url if we're replacing a file
            warc_url = base_warc_url if not bundle.data.get(
                'replace') else "%s?version=%s" % (
                    base_warc_url, str(random.random()).replace('.', ''))

            capture = Capture(link=link,
                              role='primary',
                              status='success',
                              record_type='resource',
                              user_upload='True',
                              content_type=mime_type,
                              url=warc_url)

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(
                link=link,
                role='primary',
                status='pending',
                record_type='response',
                url=link.submitted_url,
            ).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role='screenshot',
                status='pending',
                record_type='resource',
                url="file:///%s/cap.png" % link.guid,
                content_type='image/png',
            ).save()

            # create CaptureJob
            CaptureJob(link=link, human=bundle.data.get('human', False)).save()

            # kick off capture tasks -- no need for guid since it'll work through the queue
            run_task(run_next_capture.s())

        return bundle
Esempio n. 22
0
    def post(self, request, format=None):
        """ Create new link. """
        data = request.data
        capture_job = CaptureJob(human=request.data.get('human', False),
                                 submitted_url=request.data.get('url', ''),
                                 created_by=request.user)
        if settings.ENABLE_BATCH_LINKS:
            # Batch is set directly on the request object by the LinkBatch api,
            # to prevent abuse of this feature by those POSTing directly to this route.
            if getattr(request, 'batch', None):
                capture_job.link_batch = LinkBatch.objects.get(
                    id=request.batch)
        capture_job.save()

        # Set target folder, in order of preference:
        # - 'folder' key in data
        # - parent folder, if posting to /folders/:parent_id/archives
        # - user's personal folder
        try:
            folder = self.get_folder_from_request(
                request) or request.parent or request.user.root_folder
        except ValidationError as e:
            raise_invalid_capture_job(capture_job, e.detail)

        # Make sure a limited user has links left to create
        if not folder.organization:
            if not request.user.link_creation_allowed():
                if request.user.nonpaying:
                    raise_invalid_capture_job(
                        capture_job, "You've already reached your limit.")
                error = "Perma.cc cannot presently make additional Perma Links on your behalf. Visit your subscription settings page for more information."
                raise_invalid_capture_job(capture_job, error)
        else:
            registrar = folder.organization.registrar
            if not registrar.link_creation_allowed():
                error = 'Perma.cc cannot presently make links on behalf of {}. '.format(
                    registrar.name)
                if request.user.registrar:
                    contact = 'Visit your settings for subscription information.'
                else:
                    registrar_users = [
                        user.email
                        for user in registrar.active_registrar_users()
                    ]
                    contact = 'For assistance with your subscription, contact:  {}.'.format(
                        ", ".join(registrar_users))
                raise_invalid_capture_job(capture_job, error + contact)

        serializer = self.serializer_class(data=data,
                                           context={'request': request})
        if serializer.is_valid():

            link = serializer.save(created_by=request.user)

            # put link in folder and handle Org settings based on folder
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(
                folder, request.user)  # also sets link.organization

            # handle uploaded file
            uploaded_file = request.data.get('file')
            if uploaded_file:
                link.write_uploaded_file(uploaded_file)
                link.warc_size = default_storage.size(link.warc_storage_file())
                link.save()

            # handle submitted url
            else:
                # create primary capture placeholder
                Capture(
                    link=link,
                    role='primary',
                    status='pending',
                    record_type='response',
                    url=link.submitted_url,
                ).save()

                # create screenshot placeholder
                Capture(
                    link=link,
                    role='screenshot',
                    status='pending',
                    record_type='resource',
                    url="file:///%s/cap.png" % link.guid,
                    content_type='image/png',
                ).save()

                # kick off capture tasks -- no need for guid since it'll work through the queue
                capture_job.status = 'pending'
                capture_job.link = link
                capture_job.save(update_fields=['status', 'link'])
                run_task(run_next_capture.s())

            return Response(serializer.data, status=status.HTTP_201_CREATED)

        raise_invalid_capture_job(capture_job, serializer.errors)
Esempio n. 23
0
def do_update_perma(request, guid):
    run_task(update_perma, link_guid=guid)
    return HttpResponse("OK")
Esempio n. 24
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."},
                'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
            }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        if not bundle.data.get('replace'):
            bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)

        link = bundle.obj
        link.save()

        # put link in folder and handle Org settings based on folder
        folder = bundle.data.get('folder')
        if folder.organization and folder.organization.default_to_private:
            link.is_private = True
            link.save()
        link.move_to_folder_for_user(folder, bundle.request.user)  # also sets link.organization

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension']

            base_warc_url = "file:///%s/%s" % (link.guid, file_name)

            # only append a random number to warc_url if we're replacing a file
            warc_url = base_warc_url if not bundle.data.get('replace') else  "%s?version=%s" % (base_warc_url, str(random.random()).replace('.',''))

            capture = Capture(link=link,
                              role='primary',
                              status='success',
                              record_type='resource',
                              user_upload='True',
                              content_type=mime_type,
                              url=warc_url)

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(
                link=link,
                role='primary',
                status='pending',
                record_type='response',
                url=link.submitted_url,
            ).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role='screenshot',
                status='pending',
                record_type='resource',
                url="file:///%s/cap.png" % link.guid,
                content_type='image/png',
            ).save()

            # create CaptureJob
            CaptureJob(link=link, human=bundle.data.get('human', False)).save()

            # kick off capture tasks -- no need for guid since it'll work through the queue
            run_task(run_next_capture.s())

        return bundle
Esempio n. 25
0
    def post(self, request, format=None):
        """ Create new link. """
        data = request.data

        # Set target folder, in order of preference:
        # - 'folder' key in data
        # - parent folder, if posting to /folders/:parent_id/archives
        # - user's personal folder
        folder = self.get_folder_from_request(request) or request.parent or request.user.root_folder

        # Make sure a limited user has links left to create
        if not folder.organization:
            links_remaining = request.user.get_links_remaining()
            if links_remaining < 1:
                raise_validation_error("You've already reached your limit.")
        else:
            registrar = folder.organization.registrar
            if not registrar.link_creation_allowed():
                error = 'Perma.cc cannot presently make links on behalf of {}. '.format(registrar.name)
                if request.user.registrar:
                    contact = 'Visit your settings for subscription information.'
                else:
                    registrar_users = [user.email for user in registrar.active_registrar_users()]
                    contact = 'For assistance with your subscription, contact:  {}.'.format(", ".join(registrar_users))
                raise_validation_error(error + contact)

        serializer = self.serializer_class(data=data, context={'request': request})
        if serializer.is_valid():

            link = serializer.save(created_by=request.user)

            # put link in folder and handle Org settings based on folder
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(folder, request.user)  # also sets link.organization

            # handle uploaded file
            uploaded_file = request.data.get('file')
            if uploaded_file:
                link.write_uploaded_file(uploaded_file)

            # handle submitted url
            else:
                # create primary capture placeholder
                Capture(
                    link=link,
                    role='primary',
                    status='pending',
                    record_type='response',
                    url=link.submitted_url,
                ).save()

                # create screenshot placeholder
                Capture(
                    link=link,
                    role='screenshot',
                    status='pending',
                    record_type='resource',
                    url="file:///%s/cap.png" % link.guid,
                    content_type='image/png',
                ).save()

                # create CaptureJob
                CaptureJob(link=link, human=request.data.get('human', False)).save()

                # kick off capture tasks -- no need for guid since it'll work through the queue
                run_task(run_next_capture.s())

            return Response(serializer.data, status=status.HTTP_201_CREATED)
        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Esempio n. 26
0
    def post(self, request, format=None):
        """ Create new link. """
        data = request.data
        capture_job = CaptureJob(
            human=request.data.get('human', False),
            submitted_url=request.data.get('url', ''),
            created_by=request.user
        )
        if settings.ENABLE_BATCH_LINKS:
            # Batch is set directly on the request object by the LinkBatch api,
            # to prevent abuse of this feature by those POSTing directly to this route.
            if getattr(request, 'batch', None):
                capture_job.link_batch = LinkBatch.objects.get(id=request.batch)
        capture_job.save()


        # Set target folder, in order of preference:
        # - 'folder' key in data
        # - parent folder, if posting to /folders/:parent_id/archives
        # - user's personal folder
        try:
            folder = self.get_folder_from_request(request) or request.parent or request.user.root_folder
        except ValidationError as e:
            raise_invalid_capture_job(capture_job, e.detail)

        # Make sure a limited user has links left to create
        if not folder.organization:
            if not request.user.link_creation_allowed():
                if request.user.nonpaying:
                    raise_invalid_capture_job(capture_job, "You've already reached your limit.")
                error = "Perma.cc cannot presently make additional Perma Links on your behalf. Visit your subscription settings page for more information."
                raise_invalid_capture_job(capture_job, error)
        else:
            registrar = folder.organization.registrar
            if not registrar.link_creation_allowed():
                error = 'Perma.cc cannot presently make links on behalf of {}. '.format(registrar.name)
                if request.user.registrar:
                    contact = 'Visit your settings for subscription information.'
                else:
                    registrar_users = [user.email for user in registrar.active_registrar_users()]
                    contact = 'For assistance with your subscription, contact:  {}.'.format(", ".join(registrar_users))
                raise_invalid_capture_job(capture_job, error + contact)

        serializer = self.serializer_class(data=data, context={'request': request})
        if serializer.is_valid():

            link = serializer.save(created_by=request.user)

            # put link in folder and handle Org settings based on folder
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(folder, request.user)  # also sets link.organization

            # handle uploaded file
            uploaded_file = request.data.get('file')
            if uploaded_file:
                link.write_uploaded_file(uploaded_file)

            # handle submitted url
            else:
                # create primary capture placeholder
                Capture(
                    link=link,
                    role='primary',
                    status='pending',
                    record_type='response',
                    url=link.submitted_url,
                ).save()

                # create screenshot placeholder
                Capture(
                    link=link,
                    role='screenshot',
                    status='pending',
                    record_type='resource',
                    url="file:///%s/cap.png" % link.guid,
                    content_type='image/png',
                ).save()


                # kick off capture tasks -- no need for guid since it'll work through the queue
                capture_job.status = 'pending'
                capture_job.link = link
                capture_job.save(update_fields=['status', 'link'])
                run_task(run_next_capture.s())

            return Response(serializer.data, status=status.HTTP_201_CREATED)

        raise_invalid_capture_job(capture_job, serializer.errors)
Esempio n. 27
0
def update_perma(link_guid):
    """
    Update the vested/darchived status of a perma link, and download the
    assets if necessary
    """
    # N.B. This function has two instances of downloading stuff from
    # the root server using a scheme that looks something like
    #    settings.SERVER + reverse("url_pattern")
    # This is nice because it means we don't have to repeat our URL
    # patterns from urls.py, but it hardcodes the fact that the root
    # server is another Perma instance. It's unclear to me which is a
    # better fact to abstract, but this is easier for now.

    ## First, let's get the metadata for this link. The metadata
    ## contains information about where we should place the assets (if
    ## we decide that we need them). This is also a fast check to make
    ## sure the link GUID is actually real.
    metadata_server = settings.UPSTREAM_SERVER['address']
    metadata_url = metadata_server + reverse("service_link_status", args=(link_guid,))
    metadata = requests.get(
        metadata_url,
        headers=settings.UPSTREAM_SERVER.get('headers', {})
    ).json()

    ## Next, let's see if we need to get the assets. If we have the
    ## Link object for this GUID, we're going to assume we already
    ## have what we need. It would make a little more sense to use the
    ## Asset object here instead, but we're definitely going to need
    ## to do stuff to the Link object so we might as well get that
    ## instead. In practice they should be ~one to one.
    try:
        link = Link.objects.get(guid=link_guid)
    except Link.DoesNotExist:
        ## We need to download the assets. We can download an archive
        ## from the assets server.
        assets_server = settings.UPSTREAM_SERVER['address']
        assets_url = assets_server + reverse("mirroring:link_assets", args=(link_guid,))

        # Temp paths can be relative because we're in run_in_tempdir()
        temp_zip_path = 'temp.zip'

        # Save remote zip file to disk, using streaming to avoid keeping large files in RAM.
        request = requests.get(
            assets_url,
            headers=settings.UPSTREAM_SERVER.get('headers', {}),
            stream=True)
        with open(temp_zip_path, 'wb') as f:
            for chunk in request.iter_content(1024):
                f.write(chunk)

        ## Extract the archive and change into the extracted folder.
        with zipfile.ZipFile(temp_zip_path, "r") as zipfh:
            #assets_path = os.path.dirname(os.path.join(settings.MEDIA_ROOT, metadata["path"]))
            zipfh.extractall() # creates folder named [guid] in current temp dir
        temp_extracted_path = os.path.basename(metadata['path']) # e.g. "1234-ABCD"

        # Save all extracted files to default_storage, using the path in metadata.
        for root, dirs, files in os.walk(temp_extracted_path):
            for file in files:
                source_file_path = os.path.join(root, file) # e.g. "1234-ABCD/cap.png"
                dest_file_path = os.path.join(os.path.dirname(metadata['path']), source_file_path) # e.g. 2014/6/10/18/37/1234-ABCD/cap.png
                with open(source_file_path, 'rb') as source_file:
                    default_storage.store_file(source_file, dest_file_path)

        ## We can now get some additional metadata that we'll need to
        ## create the Link object.
        with open(os.path.join(temp_extracted_path, "metadata.json"), "r") as fh:
            link_metadata = json.load(fh)

        ## We now have everything we need to initialize the Link object.
        link = Link(guid=link_guid)
        link.submitted_url = link_metadata["submitted_url"]
        link.submitted_title = link_metadata["submitted_title"]
        link.created_by = None # XXX maybe we should do something with FakeUser here
        link.save(pregenerated_guid=True) # We need to save this so that we can create an Asset object

        # This is a stupid hack to overcome the fact that the Link has
        # auto_now_add=True, so it's always going to be saved to the
        # current time on first creation.
        link.creation_timestamp = unserialize_datetime(link_metadata["creation_timestamp"])
        link.save()

        ## Lastly, let's create an Asset object for this Link.
        asset = Asset(link=link)
        asset.base_storage_path = metadata["path"]
        asset.image_capture = metadata["image_capture"]
        asset.warc_capture = metadata["source_capture"]
        asset.pdf_capture = metadata["pdf_capture"]
        asset.text_capture = metadata["text_capture"]
        asset.save()

    ## We can now add some of the data we got from the metadata to the Link object
    link.dark_archived = metadata["dark_archived"]
    link.vested = metadata["vested"]
    link.save()

    # If we have sub-mirrors, poke them to get a copy from us.
    if settings.DOWNSTREAM_SERVERS:
        run_task(poke_mirrors, link_guid=link_guid)
Esempio n. 28
0
    def post(self, request, format=None):
        """ Create new link. """
        data = request.data

        # Set target folder, in order of preference:
        # - 'folder' key in data
        # - parent folder, if posting to /folders/:parent_id/archives
        # - user's personal folder
        folder = self.get_folder_from_request(
            request) or request.parent or request.user.root_folder

        # Make sure a limited user has links left to create
        if not folder.organization:
            links_remaining = request.user.get_links_remaining()
            if links_remaining < 1:
                raise_validation_error("You've already reached your limit.")
        else:
            registrar = folder.organization.registrar
            if not registrar.link_creation_allowed():
                error = 'Perma.cc cannot presently make links on behalf of {}. '.format(
                    registrar.name)
                if request.user.registrar:
                    contact = 'Visit your settings for subscription information.'
                else:
                    registrar_users = [
                        user.email
                        for user in registrar.active_registrar_users()
                    ]
                    contact = 'For assistance with your subscription, contact:  {}.'.format(
                        ", ".join(registrar_users))
                raise_validation_error(error + contact)

        serializer = self.serializer_class(data=data,
                                           context={'request': request})
        if serializer.is_valid():

            link = serializer.save(created_by=request.user)

            # put link in folder and handle Org settings based on folder
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(
                folder, request.user)  # also sets link.organization

            # handle uploaded file
            uploaded_file = request.data.get('file')
            if uploaded_file:
                link.write_uploaded_file(uploaded_file)

            # handle submitted url
            else:
                # create primary capture placeholder
                Capture(
                    link=link,
                    role='primary',
                    status='pending',
                    record_type='response',
                    url=link.submitted_url,
                ).save()

                # create screenshot placeholder
                Capture(
                    link=link,
                    role='screenshot',
                    status='pending',
                    record_type='resource',
                    url="file:///%s/cap.png" % link.guid,
                    content_type='image/png',
                ).save()

                # create CaptureJob
                CaptureJob(link=link, human=request.data.get('human',
                                                             False)).save()

                # kick off capture tasks -- no need for guid since it'll work through the queue
                run_task(run_next_capture.s())

            return Response(serializer.data, status=status.HTTP_201_CREATED)
        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
Esempio n. 29
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."},
                'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
            }))

        # Make sure a limited user has links left to create
        links_remaining = bundle.request.user.get_links_remaining()
        if (bundle.request.user.has_limit() or not bundle.data.get('organization')) and links_remaining < 1:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "You've already reached your limit."},
                'reason': "You've already reached your limit.",
            }))
            
        # Return the number remaining links after this one is created
        if bundle.request.user.has_limit() or not bundle.data.get('organization'):
            bundle.data['links_remaining'] = links_remaining - 1
        else:
            bundle.data['links_remaining'] = 'unlimited'
        
        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)
        link = bundle.obj

        # put link in folder and handle Org settings based on folder
        folder = bundle.data.get('folder')
        if folder:
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(folder, bundle.request.user)  # also sets link.organization

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension']
            warc_url = "file:///%s/%s" % (link.guid, file_name)

            capture = Capture(link=link,
                              role='primary',
                              status='success',
                              record_type='resource',
                              user_upload='True',
                              content_type=mime_type,
                              url=warc_url)

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(
                link=link,
                role='primary',
                status='pending',
                record_type='response',
                url=link.submitted_url,
            ).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role='screenshot',
                status='pending',
                record_type='resource',
                url="file:///%s/cap.png" % link.guid,
                content_type='image/png',
            ).save()

            # kick off capture task
            run_task(proxy_capture.s(link.guid, bundle.request.META.get('HTTP_USER_AGENT', '')))

        return bundle
Esempio n. 30
0
def do_update_perma(request, guid):
    run_task(update_perma, link_guid=guid)
    return HttpResponse("OK")
Esempio n. 31
0
    def post(self, request, format=None):
        """ Create new link. """
        data = request.data
        capture_job = CaptureJob(
            human=request.data.get('human', False),
            submitted_url=request.data.get('url', ''),
            created_by=request.user
        )
        if settings.ENABLE_BATCH_LINKS:
            # Batch is set directly on the request object by the LinkBatch api,
            # to prevent abuse of this feature by those POSTing directly to this route.
            if getattr(request, 'batch', None):
                capture_job.link_batch = LinkBatch.objects.get(id=request.batch)
        capture_job.save()


        # Set target folder, in order of preference:
        # - 'folder' key in data
        # - parent folder, if posting to /folders/:parent_id/archives
        # - user's personal folder
        try:
            folder = self.get_folder_from_request(request) or request.parent or request.user.root_folder
        except ValidationError as e:
            raise_invalid_capture_job(capture_job, e.detail)

        # Disallow creation of links in top-level sponsored folder
        if folder.is_sponsored_root_folder:
            error = "You can't make links directly in your Sponsored Links folder. Select a folder belonging to a sponsor."
            raise_invalid_capture_job(capture_job, error)

        # Make sure a limited user has links left to create
        if not folder.organization and not folder.sponsored_by:
            if not request.user.link_creation_allowed():
                if request.user.nonpaying:
                    raise_invalid_capture_job(capture_job, "You've already reached your limit.")
                error = "Perma.cc cannot presently make additional Perma Links on your behalf. Visit your subscription settings page for more information."
                raise_invalid_capture_job(capture_job, error)
        else:
            registrar = folder.sponsored_by if folder.sponsored_by else folder.organization.registrar

            msg = None
            if folder.read_only:
                registrar_users = [user.email for user in registrar.active_registrar_users()]
                msg = f"Your registrar has made this folder read-only. For assistance, contact: {', '.join(registrar_users)}."
            if not registrar.link_creation_allowed():
                error = 'Perma.cc cannot presently make links on behalf of {}. '.format(registrar.name)
                if request.user.registrar:
                    contact = 'Visit your settings for subscription information.'
                else:
                    registrar_users = [user.email for user in registrar.active_registrar_users()]
                    contact = 'For assistance with your subscription, contact:  {}.'.format(", ".join(registrar_users))
                msg = error + contact
            if msg:
                raise_invalid_capture_job(capture_job, msg)

        serializer = self.serializer_class(data=data, context={'request': request})
        if serializer.is_valid():

            with transaction.atomic():
                # Technique from https://github.com/harvard-lil/capstone/blob/0f7fb80f26e753e36e0c7a6a199b8fdccdd318be/capstone/capapi/serializers.py#L121
                #
                # Fetch the current user data here inside a transaction, using select_for_update
                # to lock the row so we don't collide with any simultaneous requests
                user = request.user.__class__.objects.select_for_update().get(pk=request.user.pk)

                # If this is a Personal Link, and if the user only has bonus links left, decrement bonus links
                bonus_link = False
                if not folder.organization and not folder.sponsored_by:
                    links_remaining, _ , bonus_links = user.get_links_remaining()
                    if bonus_links and not links_remaining:
                        # (this works because it's part of the same transaction with the select_for_update --
                        # we don't have to use the same object)
                        request.user.bonus_links = bonus_links - 1
                        request.user.save(update_fields=['bonus_links'])
                        bonus_link = True

                link = serializer.save(created_by=request.user, bonus_link=bonus_link)

            # put link in folder and handle Org settings based on folder
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(folder, request.user)  # also sets link.organization

            # handle uploaded file
            uploaded_file = request.data.get('file')
            if uploaded_file:
                link.write_uploaded_file(uploaded_file)

            # handle submitted url
            else:
                # create primary capture placeholder
                Capture(
                    link=link,
                    role='primary',
                    status='pending',
                    record_type='response',
                    url=link.submitted_url,
                ).save()

                # create screenshot placeholder
                Capture(
                    link=link,
                    role='screenshot',
                    status='pending',
                    record_type='resource',
                    url="file:///%s/cap.png" % link.guid,
                    content_type='image/png',
                ).save()


                # kick off capture tasks -- no need for guid since it'll work through the queue
                capture_job.status = 'pending'
                capture_job.link = link
                capture_job.save(update_fields=['status', 'link'])
                run_task(run_next_capture.s())

            return Response(serializer.data, status=status.HTTP_201_CREATED)

        raise_invalid_capture_job(capture_job, serializer.errors)