Beispiel #1
0
def replace_redirects(pages, lang, flat=True):
    results = set(pages)

    results_dict = {}

    params = {"redirects": "", "format": "json", "action": "query"}

    w = api(lang)

    for i in range(0, int(ceil(len(pages) / 50))):
        # print i*50,i*50+50-1
        params["titles"] = "|".join(pages[i * 50:i * 50 + 50])
        resp = w.post(params)

        if hasattr(resp, 'status_code'):
            click.secho("Request error", bg="red", fg="black")
            print resp
            continue

        if "redirects" in resp["query"]:
            results = results - set(
                [r["from"] for r in resp["query"]["redirects"]])
            results = results | set(
                [r["to"] for r in resp["query"]["redirects"]])

            results_dict[r["from"]] = r["to"]

    if flat:
        return list(results)
    else:
        return results_dict
def replace_redirects(pages, lang, flat=True):
  results = set(pages)

  results_dict = {}

  params = {
    "redirects": "",
    "format": "json",
    "action": "query"
  }

  w = api(lang)

  for i in range(0,int(ceil(len(pages)/50))):
    # print i*50,i*50+50-1
    params["titles"] = "|".join(pages[i*50:i*50+50])
    resp = w.post(params)

    if hasattr(resp, 'status_code'):
      click.secho("Request error", bg="red", fg="black")
      print resp
      continue

    if "redirects" in resp["query"]:
      results = results - set([ r["from"] for r in resp["query"]["redirects"]])
      results = results | set([ r["to"] for r in resp["query"]["redirects"]])

      results_dict[r["from"]] = r["to"]

  if flat:
    return list(results)
  else:
    return results_dict
Beispiel #3
0
def api_bunch(page_titles, lang, req):
    results = defaultdict(list)
    param = req

    w = api(lang)

    # print len(page_titles)

    for i in range(0, int(ceil(len(page_titles) / 50))):
        param["titles"] = "|".join(page_titles[i * 50:i * 50 + 50])

        while True:
            r = w.get(param, method="post")

            if hasattr(r, 'status_code'):
                click.secho("Request error", bg="red", fg="black")
                print r
                continue

            for pageid, p in r["query"]["pages"].items():
                if "langlinks" in p:
                    results[p["title"]].extend(p['langlinks'])

            # results.update({ p["title"]: p['langlinks'] for pageid, p in r["query"]["pages"].items() if 'langlinks' in p })

            if "continue" in r:
                param.update(r["continue"])
            else:
                break

    return results
def api_bunch(page_titles, lang, req):
  results = defaultdict(list)
  param  = req

  w = api(lang)

  # print len(page_titles)

  for i in range(0,int(ceil(len(page_titles)/50))):
    param["titles"] = "|".join(page_titles[i*50:i*50+50])

    while True:
      r = w.get(param, method="post")

      if hasattr(r, 'status_code'):
        click.secho("Request error", bg="red", fg="black")
        print r
        continue

      for pageid, p in r["query"]["pages"].items():
        if "langlinks" in p:
          results[ p["title"] ].extend(p['langlinks'])

      # results.update({ p["title"]: p['langlinks'] for pageid, p in r["query"]["pages"].items() if 'langlinks' in p })

      if "continue" in r:
        param.update(r["continue"])
      else:
        break

  return results