def parse(id_unique): #process id global db_id headers['Referer'] = "https://xueqiu.com/" + str(id_unique) try: mainPage = session.get(url.format(id_unique, 1, getRandom()), headers=headers) except: logger.warn("id {} request failed.".format(id_unique)) maxPage = mainPage.json()["maxPage"] """ threading pool process the rest of page """ followers = [] followers = getFollowers(id_unique,maxPage) # get all followers' list contain dict type #logger.warn("threading maybe wrong from {}".format(id_unique)) for follower in followers: #insert id to redis and insert dict to db uid = follower['id'] if r_client.sismember("Inserted",uid): followers.remove(follower) else: r_client.sadd("Inserted",uid) if not r_client.sismember("Got",uid): r_client.sadd("Queue",uid) if Mdb["snowball" + str(db_id)].count() > 10000: db_id = db_id + 1 Mdb["snowball" + str(db_id)].insert_many(followers) logger.info("Your insert {} 's {} followers successfully.".format(id_unique,len(followers))) r_client.sadd("Got",id_unique)
def do_request(id_unique,page,followers): #process page info **all network request** """ users :return: json """ headers['Referer'] = "https://xueqiu.com/" + str(id_unique) try: paging = session.get(url.format(id_unique,page,getRandom()),headers=headers) followers.extend(paging.json()["users"]) except: logger.warn("{} request failed".format(id_unique))
def _get_page_index(self, from_station_no, to_station_no): url = 'https://kyfw.12306.cn/otn/leftTicket/query?' data = { 'leftTicketDTO.train_date': self.train_date, 'leftTicketDTO.from_station': from_station_no, 'leftTicketDTO.to_station': to_station_no, 'purpose_codes': 'ADULT' } url = url + urlencode(data) response = session.get(url, headers=headers) if response.status_code == 200: html = response.text data = json.loads(html) if data and 'data' in data.keys(): for item in data['data']['result']: yield item