Example #1
0
class DataCrawl:
    def __init__(self):
        self.dm = DBManager("wm_details")
        self.headers = {
            "User-Agent":
            ":Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
        }

    def start_crawl(self):
        print("-start up-")
        _year, _season = get_cur_season()
        self.get_url(_year, _season)

    def end_crawl(self):
        self.dm.close_db()

    def get_url(self, year, season):
        code_list = self.dm.get_code_list()
        for item in code_list:
            key = item["code"][:6]
            url = "http://quotes.money.163.com/trade/lsjysj_" + key + ".html?year=" + year + "&season=" + season
            print(url)

            max_try = 8
            for tries in range(max_try):
                try:
                    content = requests.get(url)
                    self.parse_pager(content.content, item["code"])
                    break
                except Exception:
                    if tries < (max_try - 1):
                        sleep(2)
                        continue
                    else:
                        print("ERROR TO DOWNLODE DATE")
        code_list.close()

    def parse_pager(self, content, key):
        try:
            _result = self.dm.find_by_id(key)
            timer_list = [x["cur_timer"] for x in _result["price_list"]]
            soup = bs4.BeautifulSoup(content, "lxml")
            parse_list = soup.select("div.inner_box tr")
            for item in parse_list[1:]:
                data = [x.string for x in item.select("td")]
                price = {
                    "cur_timer": data[0],
                    "cur_open_price": data[1],
                    "cur_max_price": data[2],
                    "cur_min_price": data[3],
                    "cur_close_price": data[4],
                    "cur_price_range": data[6],
                    "cur_total_volume": data[7],
                    "cur_total_money": data[8]
                }
                if price["cur_timer"] not in timer_list:
                    self.dm.add_tk_item(key, price)
            print(key, "success")
        except Exception:
            print(key, "faild")
Example #2
0
 def update_data(self):
     print("-redis_start-")
     dm = DBManager("wm_details")
     code_list = dm.get_code_list()
     for item in code_list:
         try:
             code = item["code"][:6]
             _result = dm.find_by_id(item["code"])
             sorted_result = sorted(_result["price_list"],
                                    key=lambda x: cmp_datatime(x),
                                    reverse=True)
             self.set_data(code, sorted_result)
         except Exception:
             print("redis_error:501 " + str(item["code"]))
     print("-redis_close-")
 def update_data(self):
     add_info_logs("redis_start", "-开始同步缓存-")
     dm = DBManager("tk_details")
     code_list = dm.get_code_list()
     for item in code_list:
         try:
             code = item["code"][:6]
             _result = dm.find_by_id(item["code"])
             sorted_result = sorted(_result["price_list"],
                                    key=lambda x: cmp_datatime_02(x),
                                    reverse=True)
             self.set_data(code, sorted_result)
         except Exception:
             add_error_logs("redis_error", "501", item["code"])
             continue
     add_info_logs("redis_close", "-结束同步缓存-")