def convert_docs_to_blogs( docs: List[Document] ) -> Dict[ObjectId, JpnArticleBlog]: """Convert MongoDB BSON documents to blog objects. Returns: A mapping from each blog document's MongoDB ObjectId to the created blog object for that blog document. """ oid_blog_map = {} for doc in docs: oid_blog_map[doc['_id']] = JpnArticleBlog( title=doc['title'], author=doc['author'], source_name=doc['source_name'], source_url=doc['source_url'], publication_datetime=doc['publication_datetime'], last_updated_datetime=doc['last_updated_datetime'], rating=utils.float_or_none(doc['rating']), rating_count=utils.int_or_none(doc['rating_count']), tags=doc['tags'], catchphrase=doc.get('catchphrase'), introduction=doc.get('introduction'), article_count=utils.int_or_none(doc['article_count']), total_char_count=utils.int_or_none(doc['total_char_count']), comment_count=utils.int_or_none(doc['comment_count']), follower_count=utils.int_or_none(doc['follower_count']), in_serialization=doc['in_serialization'], last_crawled_datetime=doc.get('last_crawled_datetime'), ) return oid_blog_map
def convert_docs_to_found_positions( docs: List[Document] ) -> List[ArticleTextPosition]: """Convert MongoDB BSON documents to found positions.""" found_positions = [] for doc in docs: found_positions.append(ArticleTextPosition( start=utils.int_or_none(doc['index']), len=utils.int_or_none(doc['len']), )) return found_positions
def convert_docs_to_search_results( docs: List[Document], oid_article_map: Dict[ObjectId, JpnArticle] ) -> List[SearchResult]: """Convert MongoDB BSON documents to article search results. Args: docs: MongoDB BSON documents to convert to search result objects. oid_article_map: A mapping from articles' MongoDB ObjectIds to article objects with the data for those articles. Must contain entries for all of the articles referenced in the given search result documents. Returns: A list of search result objects converted from the given documents. """ search_results = [] for doc in docs: found_positions = convert_docs_to_found_positions( doc['found_positions'] ) search_results.append(SearchResult( article=oid_article_map[doc['article_oid']], matched_base_forms=doc['matched_base_forms'], found_positions=found_positions, quality_score=utils.int_or_none(doc['quality_score']), )) return search_results
def convert_docs_to_articles( docs: List[Document], oid_blog_map: Dict[ObjectId, JpnArticleBlog] ) -> Dict[ObjectId, JpnArticle]: """Convert MongoDB BSON documents to article objects. Args: docs: MongoDB BSON documents to convert to article objects. oid_blog_map: A mapping from blogs' MongoDB ObjectIds to blog objects with the data for those blogs. Must contain entries for all of the blogs referenced in the given article documents. Returns: A mapping from each article document's MongoDB ObjectId to the created article object for that article document. """ oid_article_map = {} for doc in docs: oid_article_map[doc['_id']] = JpnArticle( title=doc['title'], author=doc.get('author'), source_url=doc['source_url'], source_name=doc['source_name'], full_text=doc['full_text'], alnum_count=utils.int_or_none(doc['alnum_count']), has_video=doc['has_video'], tags=doc['tags'], blog=oid_blog_map.get(doc['blog_oid']), blog_article_order_num=utils.int_or_none( doc['blog_article_order_num'] ), blog_section_name=doc['blog_section_name'], blog_section_order_num=utils.int_or_none( doc['blog_section_order_num'] ), blog_section_article_order_num=utils.int_or_none(doc[ 'blog_section_article_order_num' ]), publication_datetime=doc['publication_datetime'], last_updated_datetime=doc['last_updated_datetime'], last_crawled_datetime=doc['last_crawled_datetime'], database_id=str(doc['_id']), quality_score=utils.int_or_none(doc['quality_score']), ) return oid_article_map
def convert_docs_to_found_lexical_items( docs: List[Document], oid_article_map: Dict[ObjectId, JpnArticle] ) -> List[FoundJpnLexicalItem]: """Convert MongoDB BSON documents to found lexical items. Args: docs: MongoDB BSON documents to convert to found lexical item objects. oid_article_map: A mapping from articles' MongoDB ObjectIds to article objects with the data for those articles. Must contain entries for all of the articles referenced in the given found lexical item documents. Returns: A list of found lexical item objects converted from the given documents. """ found_lexical_items = [] for doc in docs: interps = convert_docs_to_lexical_item_interps(doc['possible_interps']) found_positions = convert_docs_to_found_positions( doc['found_positions'] ) if doc['interp_position_map'] is None: doc['interp_position_map'] = {} interp_position_map = {} for i in doc['interp_position_map']: interp_positions = convert_docs_to_found_positions( doc['interp_position_map'][i] ) interp_position_map[interps[int(i)]] = interp_positions found_lexical_items.append(FoundJpnLexicalItem( base_form=doc['base_form'], article=oid_article_map[doc['article_oid']], found_positions=found_positions, possible_interps=interps, interp_position_map=interp_position_map, quality_score_mod=utils.int_or_none( doc['quality_score_exact_mod'] ), database_id=str(doc['_id']), )) return found_lexical_items