Ejemplo n.º 1
0
 def receive(self, signal, page):
     word = page.title
     if signal == 'page_deleted':
         pass  # print '@ inflection removed'
         WordInflection.objects.filter(word=word).delete()
     elif signal == 'page_content_changed':
         pass  # print '@ inflection changes check'
         old_items = WordInflection.objects.filter(word=word)
         old_tpls = [item.content for item in old_items]
         morph = u'сущ'
         lang = u'ru'
         parts = get_inflection_templates(page.content,
                                          morph, lang)
         new_tpls = [part['tpl'] for part in parts]
         if len(old_tpls) == len(new_tpls) and set(old_tpls) == set(new_tpls):
             pass  # print 'same'
         else:
             pass  # print 'different'
             WordInflection.objects.filter(word=word).delete()
             for part in parts:
                 template_title = part['title'].strip()
                 kind, gender, num = parse_template_title(template_title)
                 WordInflection.objects.bulk_add(
                     WordInflection(word=word, template=template_title,
                                    content=part['tpl'],
                                    gender=gender, kind=kind, num=num)
                 )
             WordInflection.objects.bulk_add()
             pass  # print '@@ inflection really changed', '+' * 40
             print '           @ inflection changed'
Ejemplo n.º 2
0
def parse_titles():
    for template in TemplateInflection.objects.filter(morph=u'сущ', lang='ru'):
        title = template.title
        print title
        kind, gender, num = parse_template_title(title)
        print kind, gender, num
        template.kind = kind
        template.gender = gender
        template.num = num
        template.save()
Ejemplo n.º 3
0
def process_template(article, lang):
    prefix = u'Шаблон:'
    title = article.title()
    if not title.startswith(prefix):
        print title, '-', 'BAD!', 'BAD!', '#' * 100
        return
    title = title[len(prefix):]
    print title
    article = pywikibot.Page(site, u"Шаблон:%s" % title)
    content = article.get()
    edited = convert_wiki_date(article.editTime())
    edited = make_aware(edited, pytz.UTC)

    words = title.split(' ')
    morph = words[0]
    if morph not in [u'adv', u'conj', u'interj', u'гл', u'глагол',
                     u'мест', u'прил', u'сущ', u'числ', u'падежи',
                     u'prep', u'affix', u'intro', u'phrase', u'suffix',
                     u'predic', u'склонение', u'part',
                     u'артикль', u'article', u'арт', u'деепр', u'onomatop',
                     u'interj1', u'прич', u'герундий',
                     u'склон', u'степени',
                     u'междом',
                     u'спряжения', u'спряжение', u'словоизм', u'сущ2',
                     u'принад', u'palat', u'abbrev', u'measure',
                     u'morph', u'prefix', u'ein',
                     u'союз', u'словоформы', u'глаг', u'послел',
                     u'послелог', u'падежи-мест', u'нар', u'морфема',
                     u'межд', ]:
        print u'm →', title
        return
    prefix = u"%s %s" % (morph, lang)
    if not title.startswith(prefix):
        print u'e →', title, '(%s)' % lang
        return
    info = title[len(prefix):].strip()

    kind, gender, num = parse_template_title(title)

    return TemplateInflection(
        title=title, content=content, edited=edited, lang=lang,
        morph=morph, info=info, kind=kind, gender=gender, num=num,
    )