Exemple #1
0
def convert_docs_to_blogs(
    docs: List[Document]
) -> Dict[ObjectId, JpnArticleBlog]:
    """Convert MongoDB BSON documents to blog objects.

    Returns:
        A mapping from each blog document's MongoDB ObjectId to the created
        blog object for that blog document.
    """
    oid_blog_map = {}
    for doc in docs:
        oid_blog_map[doc['_id']] = JpnArticleBlog(
            title=doc['title'],
            author=doc['author'],
            source_name=doc['source_name'],
            source_url=doc['source_url'],
            publication_datetime=doc['publication_datetime'],
            last_updated_datetime=doc['last_updated_datetime'],
            rating=utils.float_or_none(doc['rating']),
            rating_count=utils.int_or_none(doc['rating_count']),
            tags=doc['tags'],
            catchphrase=doc.get('catchphrase'),
            introduction=doc.get('introduction'),
            article_count=utils.int_or_none(doc['article_count']),
            total_char_count=utils.int_or_none(doc['total_char_count']),
            comment_count=utils.int_or_none(doc['comment_count']),
            follower_count=utils.int_or_none(doc['follower_count']),
            in_serialization=doc['in_serialization'],
            last_crawled_datetime=doc.get('last_crawled_datetime'),
        )

    return oid_blog_map
Exemple #2
0
def convert_docs_to_found_positions(
    docs: List[Document]
) -> List[ArticleTextPosition]:
    """Convert MongoDB BSON documents to found positions."""
    found_positions = []
    for doc in docs:
        found_positions.append(ArticleTextPosition(
            start=utils.int_or_none(doc['index']),
            len=utils.int_or_none(doc['len']),
        ))

    return found_positions
Exemple #3
0
def convert_docs_to_search_results(
    docs: List[Document], oid_article_map: Dict[ObjectId, JpnArticle]
) -> List[SearchResult]:
    """Convert MongoDB BSON documents to article search results.

    Args:
        docs: MongoDB BSON documents to convert to search result objects.
        oid_article_map: A mapping from articles' MongoDB ObjectIds to article
            objects with the data for those articles.
            Must contain entries for all of the articles referenced in the
            given search result documents.

    Returns:
        A list of search result objects converted from the given documents.
    """
    search_results = []
    for doc in docs:
        found_positions = convert_docs_to_found_positions(
            doc['found_positions']
        )

        search_results.append(SearchResult(
            article=oid_article_map[doc['article_oid']],
            matched_base_forms=doc['matched_base_forms'],
            found_positions=found_positions,
            quality_score=utils.int_or_none(doc['quality_score']),
        ))

    return search_results
Exemple #4
0
def convert_docs_to_articles(
    docs: List[Document], oid_blog_map: Dict[ObjectId, JpnArticleBlog]
) -> Dict[ObjectId, JpnArticle]:
    """Convert MongoDB BSON documents to article objects.

    Args:
        docs: MongoDB BSON documents to convert to article objects.
        oid_blog_map: A mapping from blogs' MongoDB ObjectIds to blog objects
            with the data for those blogs.
            Must contain entries for all of the blogs referenced in the given
            article documents.

    Returns:
        A mapping from each article document's MongoDB ObjectId to the created
        article object for that article document.
    """
    oid_article_map = {}
    for doc in docs:
        oid_article_map[doc['_id']] = JpnArticle(
            title=doc['title'],
            author=doc.get('author'),
            source_url=doc['source_url'],
            source_name=doc['source_name'],
            full_text=doc['full_text'],
            alnum_count=utils.int_or_none(doc['alnum_count']),
            has_video=doc['has_video'],
            tags=doc['tags'],
            blog=oid_blog_map.get(doc['blog_oid']),
            blog_article_order_num=utils.int_or_none(
                doc['blog_article_order_num']
            ),
            blog_section_name=doc['blog_section_name'],
            blog_section_order_num=utils.int_or_none(
                doc['blog_section_order_num']
            ),
            blog_section_article_order_num=utils.int_or_none(doc[
                'blog_section_article_order_num'
            ]),
            publication_datetime=doc['publication_datetime'],
            last_updated_datetime=doc['last_updated_datetime'],
            last_crawled_datetime=doc['last_crawled_datetime'],
            database_id=str(doc['_id']),
            quality_score=utils.int_or_none(doc['quality_score']),
        )

    return oid_article_map
Exemple #5
0
def convert_docs_to_found_lexical_items(
    docs: List[Document], oid_article_map: Dict[ObjectId, JpnArticle]
) -> List[FoundJpnLexicalItem]:
    """Convert MongoDB BSON documents to found lexical items.

    Args:
        docs: MongoDB BSON documents to convert to found lexical item objects.
        oid_article_map: A mapping from articles' MongoDB ObjectIds to article
            objects with the data for those articles.
            Must contain entries for all of the articles referenced in the
            given found lexical item documents.

    Returns:
        A list of found lexical item objects converted from the given
        documents.
    """
    found_lexical_items = []
    for doc in docs:
        interps = convert_docs_to_lexical_item_interps(doc['possible_interps'])
        found_positions = convert_docs_to_found_positions(
            doc['found_positions']
        )

        if doc['interp_position_map'] is None:
            doc['interp_position_map'] = {}

        interp_position_map = {}
        for i in doc['interp_position_map']:
            interp_positions = convert_docs_to_found_positions(
                doc['interp_position_map'][i]
            )
            interp_position_map[interps[int(i)]] = interp_positions

        found_lexical_items.append(FoundJpnLexicalItem(
            base_form=doc['base_form'],
            article=oid_article_map[doc['article_oid']],
            found_positions=found_positions,
            possible_interps=interps,
            interp_position_map=interp_position_map,
            quality_score_mod=utils.int_or_none(
                doc['quality_score_exact_mod']
            ),
            database_id=str(doc['_id']),
        ))

    return found_lexical_items