def tpl_action(self, page, tpl, title, morph, lang, params): # if self.i > 20000: # self.stop() tpl_title = title.strip() # print page.title, '-', title # print params call_params, call_numeric = process_call_params(params) # for key, value in call_params.items(): # print key mojo = u'можо' in call_params rod = u'род' in call_params def add_key_value(key, value, page_title): report.setdefault(tpl_title, dict()) report[tpl_title].setdefault(key, dict()) report[tpl_title][key].setdefault(value, list()) report[tpl_title][key][value].append(page_title) if mojo or rod: print page.title, '-', tpl_title if mojo: # print u'можо =', call_params[u'можо'] # self.mojos.append(tpl_title) add_key_value(key=u'можо', value=call_params[u'можо'], page_title=page.title) if rod: # print u'род =', call_params[u'род'] # self.rods.append(tpl_title) add_key_value(key=u'род', value=call_params[u'род'], page_title=page.title)
def tpl_action(self, page, old_tpl, title, morph, lang, params): call_params, call_numeric = process_call_params(params) if u"соотв" in call_params: if not call_params[u"соотв"]: return # print '-' * 40 print page.title, " -> ", call_params[u"соотв"]
def make_changes(self, page, old_tpl, title, morph, lang, params): tpl_title = title.strip() call_params, call_numeric = process_call_params(params) if tpl_title in selected_tpls and u'можо' in call_params: print page.title, tpl_title old_value = call_params[u'можо'] print u'можо =', old_value if old_value == u'можо': new_value = u'общ' elif old_value == u'мо' or old_value == u'м': new_value = u'муж' elif old_value == u'мосо': new_value = u'мс' else: raise Exception('Never should happen') return old_tpl.replace(u'|можо=%s\n' % old_value, u'|род=%s\n' % new_value) return old_tpl
def tpl_action(self, page, tpl, title, morph, lang, params): if self.i > 10000: self.stop() title = title.strip() print page.title, '-', title # print params self.table.setdefault(title, dict(count=0, mojo_count=0, params=set(), mojo_values=set())) self.table[title]["count"] += 1 call_params, call_numeric = process_call_params(params) mojo = u'можо' in call_params # rod = u'род' in call_params if mojo: self.table[title]["mojo_count"] += 1 self.table[title]["mojo_values"].add(call_params[u'можо']) for key, value in call_params.items(): print key self.table[title]["params"].add(key) print
def tpl_action(self, page, tpl, title, morph, lang, params): title = title.strip() # empty_templates = [ # u"сущ ru m ina", u"сущ ru f ina", u"сущ ru n ina", # u"сущ ru m a", u"сущ ru f a", u"сущ ru n a", # ] # if title in empty_templates: # print page.title #if morph != u'сущ': # continue # print '-' * 80 # output # self.counter += 1 # if not self.counter % 50: # save_wiki_page(u"Участник:Vitalik/Словоформы/v2/А1/%s" % self.n, # self.content, u"Получение списка словоформ") # self.n += 1 # self.content = '' # # print "\n\n== [[%s]] ==" % page.title # self.content += "\n\n== [[%s]] ==\n" % page.title # print title # return #continue call_params, call_numeric = process_call_params(params) try: template = TemplateInflection.objects.get(title=title) except ObjectDoesNotExist: return # todo: process templates redirects ("гл ru 4b-бСВ" → "гл ru 4b-лСВ") if not template.forms: print title, '- maybe wrong template' return print title tpl_forms = get_dict_from_text(template.forms) tpl_params = get_dict_from_text(template.params) for key, value in tpl_params.items(): tpl_params[key] = universal_process_template(value, call_params) for key, value in tpl_forms.items(): value = universal_process_template(value, call_params) value = process_template(value, tpl_params, key, morph) value = divide_words(value) tpl_forms[key] = value form_results = dict() for key, values in tpl_forms.items(): if morph == u'сущ': if key == 'nom-sg': value = values[0] if remove_stress(value) and page.title != remove_stress(value): pass # todo: мсправить их все-таки # print # print "https://ru.wiktionary.org/wiki/%s" % urllib.quote_plus(page.title.encode('utf-8')) # print page.title # print remove_stress(value) # print repr(page.title) # print repr(remove_stress(value)) for key, values in tpl_forms.items(): # if re.search('[a-z]', ' '.join(values)): #todo! find them! # print values if not values or len(values) == 1 and not values[0]: continue form_params = get_form_params(morph, key, tpl_params) if not form_params: continue for value in values: if remove_stress(value) == page.title: continue form_results.setdefault(value, list()) form_results[value].append(form_params.copy()) # todo: если полностью совпали, то тоже удалять join_form_results(morph, form_results) db_forms = list() for value, items in form_results.items(): for form_params in items: if not form_params: continue form_template = u"{{Форма-%s\n|язык=ru\n|база=%s\n" % (morph, page.title) for param_name, param_value in form_params.items(): form_template += "|%s=%s\n" % (param_name, param_value) form_template += u"|слоги={{по-слогам|%s}}\n" % value form_template += "}}" # output # # print "'''[[%s]]'''" % remove_stress(value) # # print form_template # self.content += "'''[[%s]]'''\n" % remove_stress(value) # self.content += "%s\n" % form_template # # print # db_form, created = WordForm.objects.get_or_create( # # if created: # WordForm.objects.create( # title=remove_stress(value), # base=page.title, # value=value, # template=form_template # ) db_form = WordForm( title=remove_stress(value), base=page.title, value=value, template=form_template ) db_forms.append(db_form) # db_counter += 1 # if len(db_forms) > 1000: # WordForm.objects.bulk_create(db_forms) # print dt(), '> forms added:', db_counter # db_forms = [] WordForm.objects.bulk_create(db_forms)