Ejemplo n.º 1
0
def crawl_history(direction: str, date=yesterday()):
    assert direction in ["in", "out"]
    city_code = get_city_codes()
    total = len(city_code)
    for i, city_record in enumerate(city_code):
        time.sleep(SLEEP_SEC)
        city_id = city_record["code"]
        logger.info(f"[{i+1}/{total}]: {city_record['city']} ({city_id})")

        query = (
            "https://huiyan.baidu.com/migration/historycurve.jsonp" +
            f"?dt=city&id={city_id}&type=move_{direction}&startDate=20200101&endDate={date}"
        )
        logger.info(f"Getting {query}")
        res = requests.get(query)
        if res.status_code == 200:
            logger.info("Success.")
            with open(FilepathMapper.history(city_id, direction),
                      "w",
                      encoding="utf-8") as f:
                f.write(res.text)
        else:
            logger.warning(
                f"Bad response code {res.status_code} for {city_record['city']}"
            )
Ejemplo n.º 2
0
def update_history_if_outdated(direction, city_id):
    path = FilepathMapper.history("110000", direction)
    with open(path, "r", encoding="utf-8") as f:
        res = f.read()
    if yesterday() not in res:
        logger.info("Obtaining the latest history data.")
        crawl_history(direction)
Ejemplo n.º 3
0
def load_history(date, city_id):
    update_history_if_outdated("in", city_id)
    path = FilepathMapper.history(city_id, "in")
    if os.path.exists(path):
        logger.info(f"Reading <{city_id}> <{date}> history data")
        with open(path, "r", encoding="utf-8") as f:
            res = f.read()
        return json.loads(res.split("(")[-1][:-1])["data"]["list"]
    else:
        return None
Ejemplo n.º 4
0
def load_p2p(date, city_id):
    path = FilepathMapper.p2p(date, city_id, "in")
    if not os.path.exists(path):
        logger.info("Obtaining the latest point to point data")
        crawl_p2p("in", date)

    logger.info(f"Reading <{city_id}> <{date}> point to point data")
    with open(path, "r", encoding="utf-8") as f:
        res = f.read()

    return json.loads(res.split("(")[-1][:-1])["data"]["list"]