def check(self, articledict): qresult = get_all_templates(root=to_uri("Category:Formal_languages")) qresult.update(get_all_templates(root=to_uri("Category:Computer_file_formats"))) for a in articledict: articledict[a]["IsStub"] = 0 for title, templates in qresult.items(): if title in articledict and any("-stub" in t.lower() for t in templates): articledict[title]["IsStub"] = 1 return articledict
def check(self, articledict): print("Checking for stubs...") qresult = get_all_templates( root=to_uri("Category:Computing_platforms")) qresult.update(get_all_templates(root=to_uri("Category:Software"))) for a in articledict: articledict[a]["IsStub"] = 0 for title, templates in qresult.items(): if title in articledict and any("-stub" in t.lower() for t in templates): articledict[title]["IsStub"] = 1 return articledict
def init_cat_subcat(): print("Mining subcategories of categories") catdict = dict() for c in ROOTS: for i in range(DEPTH + 1): d2 = category_to_subcategory_below(to_uri(c), i, i) for cat, subcats in d2.items(): if cat not in catdict: catdict[cat] = dict() catdict[cat][c + "Depth"] = i if "subcats" not in catdict[cat]: catdict[cat]["subcats"] = subcats for sc in subcats: if sc not in catdict: catdict[sc] = dict() if c + "Depth" not in catdict[sc]: catdict[sc][c + "Depth"] = i + 1 for subcat in subcats: if subcat not in catdict: catdict[subcat] = dict() catdict[subcat]["supercats"] = [cat] else: if "supercats" not in catdict[subcat]: catdict[subcat]["supercats"] = [cat] else: catdict[subcat]["supercats"].append(cat) return catdict
def init_cat_supercat(catdict): print("Mining supercategories of categories") for c in ROOTS: for i in range(DEPTH + 1): results = category_to_supercategory_below(to_uri(c), i, i) for cat, supercats in results.items(): catdict[cat]["supercats"] = supercats return catdict
def add_function(articledict, fun, name): print("Mining " + name) d = dict() for c in ROOTS: d.update(fun(to_uri(c), 0, DEPTH)) for cl in articledict: if cl in d: articledict[cl][name] = d[cl] return articledict
def init_articledict(): print("Mining article names and depth of first appearance") articledict = dict() for c in ROOTS: for i in range(DEPTH + 1): articles = articles_below(to_uri(c), i, i) for title in articles: if title not in articledict: articledict[title] = dict() if c + "Depth" not in articledict[title]: articledict[title][c + "Depth"] = i return articledict
def check(self, catdict, artdict): print("Checking for Eponymous") for cat in catdict: catdict[cat]["Eponymous"] = int(cat in artdict) # TODO: http://live.dbpedia.org/property/commons for Java : Category:Java (en) for c in ROOTS: cls = articles_with_commons(to_uri(c), 0, DEPTH) for cl in cls: for cat in cls[cl]: if cat in catdict: catdict[cat]["Eponymous"] = 1 return catdict