def do_update_story( ctx: ActorContext, feed_id: T.int, offset: T.int, content: T.str, summary: T.str, has_mathjax: T.bool.optional, url: T.url, ): story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True) if not story: LOG.error('story#%s,%s not found', feed_id, offset) return if not is_fulltext_content(content): story_text = processor.story_html_to_text(story.content) text = processor.story_html_to_text(content) if not is_summary(story_text, text): msg = 'fetched story#%s,%s url=%r is not fulltext of feed story content' LOG.info(msg, feed_id, offset, url) return data = dict( link=url, content=content, summary=summary, has_mathjax=has_mathjax, ) STORY_SERVICE.update_story(feed_id, offset, data) _detect_story_images(ctx, story)
def _update_story( story: CommonStory, story_content_info: StoryContentInfo, content: str, summary: str, url: str, has_mathjax: bool = None, sentence_count: int = None ) -> FulltextAcceptStrategy: new_info = StoryContentInfo(content) accept = decide_accept_fulltext(new_info, story_content_info) if accept == FulltextAcceptStrategy.REJECT: msg = 'fetched story#%s,%s url=%r is not fulltext of feed story content' LOG.info(msg, story.feed_id, story.offset, url) return accept if accept == FulltextAcceptStrategy.APPEND: content = (story.content or '') + '\n<hr/>\n' + (content or '') data = dict( link=url, content=content, summary=summary, has_mathjax=has_mathjax, sentence_count=sentence_count, ) STORY_SERVICE.update_story(story.feed_id, story.offset, data) return accept
def test_update_story(self): storys_0_20 = self.storys[:20] modified = STORY_SERVICE.bulk_save_by_feed(self.feed_id, storys_0_20, batch_size=10) self.assertEqual(len(modified), 20) self.assert_feed_total_storys(20) self.assert_total_story_infos(20) story_10 = self.updated_storys[10] data = {k: story_10[k] for k in ['content', 'summary', 'dt_published']} STORY_SERVICE.update_story(self.feed_id, 10, data) content_data = {'content': data['content']} STORY_SERVICE.update_story(self.feed_id, 10, content_data)
def _replace_story_images(feed_id, offset): story = STORY_SERVICE.get_by_offset(feed_id, offset, detail=True) image_processor = StoryImageProcessor(story.link, story.content) image_indexs = image_processor.parse() image_urls = _image_urls_of_indexs(image_indexs) if not image_urls: return image_statuses = ImageInfo.batch_detect_images(image_urls) image_replaces = {} for url, status in image_statuses.items(): if status in IMAGE_REFERER_DENY_STATUS: new_url_data = encode_image_url(url, story.link) image_replaces[url] = '/api/v1/image/{}?{}'.format(new_url_data, RSSANT_IMAGE_TAG) LOG.info(f'story#{feed_id},{offset} {story.link} ' f'replace {len(image_replaces)} referer deny images') # image_processor.process will (1) fix relative url (2) replace image url # call image_processor.process regardless of image_replaces is empty or not content = image_processor.process(image_indexs, image_replaces) STORY_SERVICE.update_story(feed_id, offset, {'content': content})