Beispiel #1
0
def crawl(category):
    target_fp = indices_root / f'{category}.txt'
    site = moegirl()
    with open(target_fp, 'w') as f:
        for p in site.categories[category]:
            title = p.name
            if not title.startswith("Category:"):
                continue
            f.write(f"{title[len('Category:'):]}\n")
Beispiel #2
0
def crawl(category):
    target_fp = indices_root / f'{category}.txt'
    site = moegirl()
    with open(target_fp, 'w') as f:
        for p in site.categories[category]:
            title = p.name
            if title.startswith("Category:"):
                continue
            print(category, title)
            f.write(title + "\n")
Beispiel #3
0
def crawl(category):
    target_fp = indices_root / f'{category}.txt'
    site = moegirl()
    with open(target_fp, 'w') as f:
        for p in site.categories[category]:
            title = p.name
            if title.startswith("Category:"):
                continue
            print(category, title)
            item = PageIndex(title=title, source=site.host)
            json.dump(class_to_json(item), f)
            f.write("\n")
Beispiel #4
0
def get_info(title):
    site = moegirl()
    page = site.pages[title]
    tags = {}
    doc = wtp.parse(page.text())
    for temp in doc.templates:
        for arg in temp.arguments:
            k = arg.name.strip()
            if k in tag_keys:
                tags[k] = arg.value.strip()
    categories = [c.name[9:] for c in page.categories()]
    info = {'page_title': title, 'tags': tags, 'categories': categories}
    return info
Beispiel #5
0
def get_page(title) -> CVInfo:
    title = convert_zh(title, 'zh-CN')
    site = moegirl()
    page = site.pages[title]
    text = page.text()
    gender = get_gender(title, text)
    birth_year = get_birth_year(text)
    characters = get_characters(text, site.host)
    return CVInfo(name=title,
                  gender=gender,
                  birth_year=birth_year,
                  characters=characters,
                  source=site.host)
Beispiel #6
0
def crawl(ftype):
    index_fp = indices_root / f'{ftype}_index.txt'
    target_fp = indices_root / f'{ftype}.txt'
    site = moegirl()
    categories = open(index_fp).read().split()
    with open(target_fp, 'w') as f:
        for cate in categories:
            for p in site.categories[cate]:
                title = p.name
                if title.startswith("Category:"):
                    continue
                print(ftype, cate, title)
                item = PageIndexWithTag(title=title,
                                        tag=cate,
                                        source=site.host)
                json.dump(class_to_json(item), f)
                f.write("\n")
Beispiel #7
0
 def test_moegirl(self):
     self.run_test(moegirl())