Beispiel #1
0
 def get_lemma_str_from_cat(self, category: str) -> List[str]:
     page = Category(self.wiki, category)
     cat_list = [
         str(lemma).strip("[]")[2:]
         for lemma in CategorizedPageGenerator(page)
     ]
     return cat_list
    def test_subpage_filter(self):
        site = self.get_site()
        test_cat = pywikibot.Category(site, 'Subpage testing')

        gen = CategorizedPageGenerator(test_cat)
        gen = pagegenerators.SubpageFilterGenerator(gen, 0)
        expect_0 = ('/home/test', )
        self.assertPagelistTitles(gen, titles=expect_0, site=site)

        gen = CategorizedPageGenerator(test_cat)
        gen = pagegenerators.SubpageFilterGenerator(gen, 3)
        expect_3 = (
            '/home/test',
            'User:Sn1per/ProtectTest1/test',
            'User:Sn1per/ProtectTest1/test/test',
            'User:Sn1per/sandbox',
        )
        self.assertPagelistTitles(gen, titles=expect_3, site=site)
Beispiel #3
0
def load_files(categories, depth):
    """
    Returns a list of unique files in categories

    @param categories: List of Commons category names as strings
    @type categories: list
    @param depth: Category recursion depth
    @type depth: int
    @rtype: list
    """
    files = set()
    for cat in categories:
        cat = Category(commons, cat)
        generator = CategorizedPageGenerator(cat,
                                             recurse=depth,
                                             namespaces=Namespace.FILE)
        for page in generator:
            files.add(page.title(withNamespace=False))

    return list(files)
import hashlib
import uuid
import json

site = Site('commons', 'commons')
cat = Category(site, 'Category:Media_contributed_by_the_Swedish_Performing_Arts_Agency:_2019-03')
translate = GoogleTranslate(input('google service account file:'))

def thumb_from_title(title):
    safe_title = title.encode('utf-8')
    md5_title = hashlib.md5(safe_title).hexdigest()

    return 'https://upload.wikimedia.org/wikipedia/commons/thumb/{}/{}/{}/500px-{}.jpg'.format(md5_title[:1], md5_title[:2], title, title)

final_pages = list()
for page in CategorizedPageGenerator(cat, recurse=False, namespaces=6):
    wikicode = mwparserfromhell.parse(page.text)

    template_to_parse = False
    for template in wikicode.filter_templates():
        if template.name.matches('Musikverket-image'):
            template_to_parse = template

    if not template_to_parse:
        print('failed to find given template')
        continue

    page_data = {}

    page_data['media_id'] = '{}'.format(page.pageid)
    page_data['local_id'] = mwparserfromhell.parse(template_to_parse).filter_templates()[0].get('ID').value.lstrip()