コード例 #1
0
def do_update_story_images(
    ctx: ActorContext,
    feed_id: T.int,
    offset: T.int,
    story_url: T.url,
    images: T.list(T.dict(
        url=T.url,
        status=T.int,
    ))
):
    # save image info
    url_root_status = {}
    for img in images:
        url_root = ImageInfo.extract_url_root(img['url'])
        value = (img['status'], img['url'])
        if url_root in url_root_status:
            url_root_status[url_root] = max(value, url_root_status[url_root])
        else:
            url_root_status[url_root] = value
    with transaction.atomic():
        image_info_objects = []
        for url_root, (status, url) in url_root_status.items():
            image_info_objects.append(ImageInfo(
                url_root=url_root,
                sample_url=url,
                referer=story_url,
                status_code=status,
            ))
        LOG.info(f'bulk create {len(image_info_objects)} ImageInfo objects')
        ImageInfo.objects.bulk_create(image_info_objects)
    _replace_story_images(feed_id, offset)
コード例 #2
0
def _detect_story_images(ctx, story):
    image_processor = StoryImageProcessor(story.link, story.content)
    image_urls = _image_urls_of_indexs(image_processor.parse())
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    num_todo_image_urls = 0
    todo_url_roots = defaultdict(list)
    for url in image_urls:
        status = image_statuses.get(url)
        if status is None:
            num_todo_image_urls += 1
            url_root = ImageInfo.extract_url_root(url)
            todo_url_roots[url_root].append(url)
    LOG.info(
        f'story#{story.feed_id},{story.offset} {story.link} has {len(image_urls)} images, '
        f'need detect {num_todo_image_urls} images '
        f'from {len(todo_url_roots)} url_roots'
    )
    if todo_url_roots:
        todo_urls = []
        for items in todo_url_roots.values():
            if len(items) > 3:
                todo_urls.extend(random.sample(items, 3))
            else:
                todo_urls.extend(items)
        ctx.hope('worker_rss.detect_story_images', dict(
            feed_id=story.feed_id,
            offset=story.offset,
            story_url=story.link,
            image_urls=list(set(todo_urls)),
        ))
    else:
        _replace_story_images(feed_id=story.feed_id, offset=story.offset)
コード例 #3
0
def _replace_story_images(feed_id, offset):
    story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True)
    image_processor = StoryImageProcessor(story.link, story.content)
    image_indexs = image_processor.parse()
    image_urls = _image_urls_of_indexs(image_indexs)
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    image_replaces = {}
    for url, status in image_statuses.items():
        if status in IMAGE_REFERER_DENY_STATUS:
            new_url_data = encode_image_url(url, story.link)
            image_replaces[url] = '/api/v1/image/{}?{}'.format(new_url_data, RSSANT_IMAGE_TAG)
    LOG.info(f'story#{feed_id},{offset} {story.link} '
             f'replace {len(image_replaces)} referer deny images')
    # image_processor.process will (1) fix relative url (2) replace image url
    # call image_processor.process regardless of image_replaces is empty or not
    content = image_processor.process(image_indexs, image_replaces)
    STORY_SERVICE.update_story(feed_id, offset, {'content': content})
コード例 #4
0
def _replace_story_images(story_id):
    story = Story.objects.get(pk=story_id)
    image_processor = StoryImageProcessor(story.link, story.content)
    image_indexs = image_processor.parse()
    image_urls = _image_urls_of_indexs(image_indexs)
    if not image_urls:
        return
    image_statuses = ImageInfo.batch_detect_images(image_urls)
    image_replaces = {}
    for url, status in image_statuses.items():
        if status in IMAGE_REFERER_DENY_STATUS:
            new_url_data = encode_image_url(url, story.link)
            image_replaces[url] = '/api/v1/image/{}?{}'.format(new_url_data, RSSANT_IMAGE_TAG)
    LOG.info(f'story#{story_id} {story.link} '
             f'replace {len(image_replaces)} referer deny images')
    # image_processor.process will (1) fix relative url (2) replace image url
    # call image_processor.process regardless of image_replaces is empty or not
    content = image_processor.process(image_indexs, image_replaces)
    story.content = content
    story.save()
コード例 #5
0
def do_clean_image_info_by_retention(ctx: ActorContext):
    num_rows = ImageInfo.delete_by_retention()
    LOG.info('delete {} outdated imageinfos'.format(num_rows))
コード例 #6
0
ファイル: rss.py プロジェクト: lightcax/rssant
def do_clean_image_info_by_retention(ctx: ActorContext):
    if not CONFIG.detect_story_image_enable:
        return
    num_rows = ImageInfo.delete_by_retention()
    LOG.info('delete {} outdated imageinfos'.format(num_rows))