def main(): """ main script function """ def extr_mini_maxi(titl): """ extract bounds from title""" res = re.split(u"[()-]", titl) return res[1], res[2] items = [ item_by_title("fr", title) for title in MAINS ] ranges = [ extr_mini_maxi(title) for title in MAINS ] # items : the main articles, 7 main ranges, separated into subranges each all_items = items all_ranges = ranges make_sequence(items) for (item, rang_) in zip(items, ranges): (min_, max_) = rang_ prefix = min_[0:-4] print ("====================='{}'========================".format(prefix)) def gen_title(lrange): """ title gen""" mi_ = ('{}{}'.format(prefix, lrange.split(" ")[0])) ma_ = ('{}{}'.format(prefix, lrange.split(" ")[1])) # import pdb ; pdb.set_trace() return frtitle(mi_, ma_) titles = [ gen_title(lrange) for lrange in LESSER.split("\n") ] items = [ item_by_title("fr", title) for title in titles ] ranges = [ extr_mini_maxi(title) for title in titles ] make_sequence(items) # suboptimal all_items = all_items + items all_ranges = all_ranges + ranges for (item, (min_, max_)) in zip(all_items, all_ranges): set_for_lang( item, u"Table des caractères Unicode", "fr", label(min_, max_), "ambiguity and label correction") set_for_lang( item, u"", "en", enlabel(min_, max_), "ambiguity and label correction") # correction of previous bug as it seems set_for_lang( item, u"Unicode characters from 100000 to 10FFFF codepoints",\ "en", enlabel(min_, max_), "ambiguity and label correction")
def treat_serie(serie_name, site_name = 'en', \ main_page_name = None, num = None, start_year = None, title_pattern = "{}_{}"): """ main """ if not main_page_name: main_page_name = serie_name site = pywikibot.getSite(site_name) print("Serie : {}, Page: {}".format(serie_name, main_page_name) ) serie_item = wd_lib.item_by_title(site, main_page_name) # Patterns of the titles of the series has_previous = True current = 1 items = {} if not num: num = 1000 year = start_year while has_previous and current < num: title = title_pattern.format(serie_name, year) pywikibot.output("searching article : {}".format(title)) page = pywikibot.Page(site, title) print(title) if page.exists(): datapage = pywikibot.DataPage(page) if datapage.exists(): datapage.get() items[current] = datapage else: raise Exception() current += 1 year = year + 1 else: has_previous = False num_season = current - 1 print("Number of seasons : {}".format(num_season)) year = start_year for i in range(1, len(items) + 1): print("season {}, item: {}". format(i, items[i])) set_season_labels(items[i], serie_name, i, year) year = year + 1 # part of (P361): this item is a part of that item items[i] = wd_lib.reloaditempage(items[i]) wd_lib.maybe_set_claim(items[i], 361, serie_item) # wd_lib.instance_of(items[i], wd_lib.item_by_title("fr", u"Saison (télévision)")) type_item = wd_lib.item_by_title("fr", u"Saison (télévision)") print(type_item) wd_lib.make_sequence(items.itervalues(), type_item)