def crawl_history(direction: str, date=yesterday()): assert direction in ["in", "out"] city_code = get_city_codes() total = len(city_code) for i, city_record in enumerate(city_code): time.sleep(SLEEP_SEC) city_id = city_record["code"] logger.info(f"[{i+1}/{total}]: {city_record['city']} ({city_id})") query = ( "https://huiyan.baidu.com/migration/historycurve.jsonp" + f"?dt=city&id={city_id}&type=move_{direction}&startDate=20200101&endDate={date}" ) logger.info(f"Getting {query}") res = requests.get(query) if res.status_code == 200: logger.info("Success.") with open(FilepathMapper.history(city_id, direction), "w", encoding="utf-8") as f: f.write(res.text) else: logger.warning( f"Bad response code {res.status_code} for {city_record['city']}" )
def update_history_if_outdated(direction, city_id): path = FilepathMapper.history("110000", direction) with open(path, "r", encoding="utf-8") as f: res = f.read() if yesterday() not in res: logger.info("Obtaining the latest history data.") crawl_history(direction)
def load_history(date, city_id): update_history_if_outdated("in", city_id) path = FilepathMapper.history(city_id, "in") if os.path.exists(path): logger.info(f"Reading <{city_id}> <{date}> history data") with open(path, "r", encoding="utf-8") as f: res = f.read() return json.loads(res.split("(")[-1][:-1])["data"]["list"] else: return None