Python get_article_from_gzip_json Examples

Programming Language: Python

Namespace/Package Name: q20

Method/Function: get_article_from_gzip_json

Examples at hotexamples.com: 3

Python get_article_from_gzip_json - 3 examples found. These are the top rated real world Python examples of q20.get_article_from_gzip_json extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_chapter3.py Project: korham/Knock100

 def test_q20_no_result(self):
     file = os.path.join(self.td.name, "test.json.gz")
     with gzip.open(file, mode="wt", encoding="utf-8") as gz:
         gz.write(r'{"text": "１について\n記事本文です。", "title": "タイトル１"}' + "\n")
         gz.write(r'{"text": "２について\n記事本文である。", "title": "タイトル２"}' + "\n")
         gz.write(r'{"text": "３について\n記事本文だよ。", "title": "タイトル３"}' + "\n")
     result = q20.get_article_from_gzip_json(file, "タイトル４")
     self.assertIsNone(result)

Example #2

Show file

File: q21.py Project: korham/Knock100

# 21. カテゴリ名を含む行を抽出

# 記事中でカテゴリ名を宣言している行を抽出せよ．

import os, re

import q20


def pick_category_rows(article):
    results = re.findall(r"\[\[Category:.*?\]\]\n", article)
    return results


if __name__ == "__main__":
    file = os.path.join(os.path.dirname(__file__),
                        r"..\DataSource\jawiki-country.json.gz")
    article = q20.get_article_from_gzip_json(file, "イギリス")
    result = (pick_category_rows(article))
    print("".join(result))

Example #3

Show file

File: q23.py Project: korham/Knock100

# 23. セクション構造

# 記事中に含まれるセクション名とそのレベル（例えば"== セクション名 =="なら1）を表示せよ．

# wikipediaのセクションレベル？
# 行頭に等号。数が増えるごとに1階層下がる（等号一つは見出し）
# == セクションの見出し ==
# === サブセクションの見出し ===
# ==== サブサブセクションの見出し ====

import os, re
import q20

def pick_sections(article):
    result = []
    # 2つ以上の等号で囲まれた行
    pattern = r"^(?P<level>={2,})(?P<section>.*?)={2,}$"
    for m in re.finditer(pattern, article, flags=re.MULTILINE):
        section = m.group("section").strip()
        level = len(m.group("level")) - 1
        result.append((section, level))
    return result

if __name__ == "__main__":
    file = os.path.join(os.path.dirname(__file__), r"..\DataSource\jawiki-country.json.gz")
    article = q20.get_article_from_gzip_json(file, "日本")
    result = (pick_sections(article))
    for i in result:
        print(i)