def replace_redirects(pages, lang, flat=True): results = set(pages) results_dict = {} params = {"redirects": "", "format": "json", "action": "query"} w = api(lang) for i in range(0, int(ceil(len(pages) / 50))): # print i*50,i*50+50-1 params["titles"] = "|".join(pages[i * 50:i * 50 + 50]) resp = w.post(params) if hasattr(resp, 'status_code'): click.secho("Request error", bg="red", fg="black") print resp continue if "redirects" in resp["query"]: results = results - set( [r["from"] for r in resp["query"]["redirects"]]) results = results | set( [r["to"] for r in resp["query"]["redirects"]]) results_dict[r["from"]] = r["to"] if flat: return list(results) else: return results_dict
def replace_redirects(pages, lang, flat=True): results = set(pages) results_dict = {} params = { "redirects": "", "format": "json", "action": "query" } w = api(lang) for i in range(0,int(ceil(len(pages)/50))): # print i*50,i*50+50-1 params["titles"] = "|".join(pages[i*50:i*50+50]) resp = w.post(params) if hasattr(resp, 'status_code'): click.secho("Request error", bg="red", fg="black") print resp continue if "redirects" in resp["query"]: results = results - set([ r["from"] for r in resp["query"]["redirects"]]) results = results | set([ r["to"] for r in resp["query"]["redirects"]]) results_dict[r["from"]] = r["to"] if flat: return list(results) else: return results_dict
def api_bunch(page_titles, lang, req): results = defaultdict(list) param = req w = api(lang) # print len(page_titles) for i in range(0, int(ceil(len(page_titles) / 50))): param["titles"] = "|".join(page_titles[i * 50:i * 50 + 50]) while True: r = w.get(param, method="post") if hasattr(r, 'status_code'): click.secho("Request error", bg="red", fg="black") print r continue for pageid, p in r["query"]["pages"].items(): if "langlinks" in p: results[p["title"]].extend(p['langlinks']) # results.update({ p["title"]: p['langlinks'] for pageid, p in r["query"]["pages"].items() if 'langlinks' in p }) if "continue" in r: param.update(r["continue"]) else: break return results
def api_bunch(page_titles, lang, req): results = defaultdict(list) param = req w = api(lang) # print len(page_titles) for i in range(0,int(ceil(len(page_titles)/50))): param["titles"] = "|".join(page_titles[i*50:i*50+50]) while True: r = w.get(param, method="post") if hasattr(r, 'status_code'): click.secho("Request error", bg="red", fg="black") print r continue for pageid, p in r["query"]["pages"].items(): if "langlinks" in p: results[ p["title"] ].extend(p['langlinks']) # results.update({ p["title"]: p['langlinks'] for pageid, p in r["query"]["pages"].items() if 'langlinks' in p }) if "continue" in r: param.update(r["continue"]) else: break return results