Example #1
0
def parse(id_unique):   #process id
    global db_id
    headers['Referer'] = "https://xueqiu.com/" + str(id_unique)
    try:
        mainPage = session.get(url.format(id_unique, 1, getRandom()), headers=headers)
    except:
        logger.warn("id {} request failed.".format(id_unique))
    maxPage = mainPage.json()["maxPage"]
    """
    threading pool process the rest of page
    """
    followers = []

    followers = getFollowers(id_unique,maxPage)  # get all followers' list contain dict type
        #logger.warn("threading maybe wrong from {}".format(id_unique))
    for follower in followers:
        #insert id to redis and insert dict to db
        uid = follower['id']
        if r_client.sismember("Inserted",uid):
            followers.remove(follower)
        else:
            r_client.sadd("Inserted",uid)

        if not r_client.sismember("Got",uid):
            r_client.sadd("Queue",uid)
    if Mdb["snowball" + str(db_id)].count() > 10000:
        db_id = db_id + 1
    Mdb["snowball" + str(db_id)].insert_many(followers)
    logger.info("Your insert {} 's {} followers successfully.".format(id_unique,len(followers)))
    r_client.sadd("Got",id_unique)
Example #2
0
def do_request(id_unique,page,followers):  #process page info   **all network request**
    """
    users
    :return: json
    """
    headers['Referer'] = "https://xueqiu.com/" + str(id_unique)
    try:
        paging = session.get(url.format(id_unique,page,getRandom()),headers=headers)
        followers.extend(paging.json()["users"])
    except:
        logger.warn("{} request failed".format(id_unique))
Example #3
0
 def _get_page_index(self, from_station_no, to_station_no):
     url = 'https://kyfw.12306.cn/otn/leftTicket/query?'
     data = {
         'leftTicketDTO.train_date': self.train_date,
         'leftTicketDTO.from_station': from_station_no,
         'leftTicketDTO.to_station': to_station_no,
         'purpose_codes': 'ADULT'
     }
     url = url + urlencode(data)
     response = session.get(url, headers=headers)
     if response.status_code == 200:
         html = response.text
         data = json.loads(html)
         if data and 'data' in data.keys():
             for item in data['data']['result']:
                 yield item