def insert(self, text=dict): """插入数据""" try: self.collection.insert(text) logger.info(f"insert success!") except Exception as e: logger.error(e) logger.error("insert failed")
def run(self): logger.info(f"search_run_start:{self.keyword, self.page}") for page in range(self.page): page_1 = page * 2 + 1 # 从0开始计数 (1,2) (3,4) page_2 = page_1 + 1 time.sleep(1) res = self.search(page_1) self.list_resume(res) res = self.search_half(page_2) self.list_resume(res)
def alarm(h, m=0, s=0): """当前时间大于预设时间就进行下面函数执行,否则sleep 1秒""" status = True while True: # 拼接设置的时间为时间戳 str_time = time.strftime("%Y-%m-%d", time.localtime()) + f" {h}:{m}:{s}" # 将字符串时间戳改为时间戳数字 timeStamp = int( time.mktime(time.strptime(str_time, "%Y-%m-%d %H:%M:%S"))) now_time = time.time() if status: logger.info(f"set clock at :{str_time}---{timeStamp}") logger.info( f'now time is :{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}---{now_time}' ) status = False if now_time >= timeStamp: logger.info("开始执行下面函数") return True time.sleep(3) logger.info("dadada~") # alarm(14,28,0)
def search(self, query=None, type=1, count=10): """查询的结果是dict生成器""" if type == 1: # 只查找一个 doc = self.collection.find_one() if not doc: return None logger.info(f"search success!") return doc else: array = self.collection.find(query).limit(count) if not array: return None logger.info(f"search success!") return array
def update(self, query=dict, new_text=dict, multi=False): """更新插入的数据以及选择更新的次数""" if not multi: try: self.collection.update(query, {"$set": new_text}, upsert=True) logger.info(f"upload success!") except Exception as e: logger.error(e) logger.error("upload failed") else: try: self.collection.update(query, {"$set": new_text}, upsert=True, multi=True) logger.info(f"upload success!") except Exception as e: logger.error(e) logger.error("upload failed")
def get_price(self): headers = { 'Host': 'p.3.cn', 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3409.2 Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', } params = ( ('skuIds', f'J_{self.sku_id},J_'), ('type', '1'), ) response = "" for _ in range(5): try: response = requests.get('https://p.3.cn/prices/mgets', headers=headers, params=params, proxies=self.proxy, timeout=15) except Exception as e: logger.error(e) time.sleep(10) continue if "id" in response.text: break logger.info(response.text) res_json = json.loads(response.text) try: price = res_json[0].get("p", "") except: price = "" try: price_plus = res_json[0].get("tpp", "") except: price_plus = "" return price, price_plus
def delete(self, query=dict, multi=False): """删除不需要的数据""" if multi: try: self.collection.remove(query) logger.info(f"delete succees!") except Exception as e: logger.error(e) logger.error("delete failed!") else: try: res = self.collection.find_one_and_delete(query) logger.info(f"delete succees!") except Exception as e: res = None logger.error(e) logger.error("delete failed!") if not res: logger.error("delete failed!")
# l = List_spider("华为手机", 10) # l.run() # 从mongo拿到id 查询商品的详细信息 m = Mongo_db() # 查询所有的,限制1000条 res = m.search({}, 2, 1000) count = 0 for i in res: count += 1 sku_id = i.get("sku_id", "") if not sku_id: continue jd = JD_Spider(sku_id) jd.run() print(count) while True: # 设置运行闹钟 每天晚上9点开始运行 alarm(17, 39, 0) logger.info(f"it‘s time to run~now is {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}") # 运行数据更新 logger.info(f'start run ~') # run() # time.sleep(10*60) # 等一个小时后运行数据分析 get_deep_price_down() # 程序睡到第二天后继续 time.sleep(60*60*10)
def send_2_me(text, desp): r = requests.post(SERVER_JUN_URL, data={'text': text, 'desp': desp}) logger.info(r.status_code) logger.info("tell me success!")