def crawl_from_zdaye(self): url = "http://api.zdaye.com/?api=201607090751537556&sleep=5%C3%EB%C4%DA&gb=2&post=%D6%A7%B3%D6&ct=200" proxy_lst = set() r = requests.get(url, timeout=6) proxy_lst = proxy_lst.union( set(re.findall(r"(\d+.\d+.\d+.\d+:\d+)", r.content))) add_cnt = 0 for ipstr in proxy_lst: if self.valid_proxy(ipstr): rebot_log.info("[crawl_from_zdaye] %s", ipstr) self.add_proxy(ipstr) add_cnt += 1 return add_cnt
def send_order_request(self, order): rebot = order.get_lock_rebot() sn = order.pay_order_no url = 'http://order.xuyunjt.com/wsdgalipayddcx.aspx' headers = { "User-Agent": rebot.user_agent, } headers['Content-Type'] = 'application/x-www-form-urlencoded' r = requests.get(url, headers=headers) soup = bs(r.content, 'lxml') state = soup.find( 'input', attrs={'name': '__VIEWSTATE', 'id': '__VIEWSTATE'}).get('value', '') validation = soup.find('input', attrs={ 'name': '__EVENTVALIDATION', 'id': '__EVENTVALIDATION'}).get('value', '') data = { '__EVENTTARGET': '', '__EVENTARGUMENT': '', '__LASTFOCUS': '', '__VIEWSTATE': state, '__EVENTVALIDATION': validation, 'ctl00$ContentPlaceHolder1$txtbespoke_id': order.raw_order_no, 'ctl00$ContentPlaceHolder1$txtcardno': order.contact_info.get('id_number'), 'ctl00$ContentPlaceHolder1$ddlcond': '全部', 'ctl00$ContentPlaceHolder1$ImageButton1.x': '23', 'ctl00$ContentPlaceHolder1$ImageButton1.y': '18', } r = requests.post(url, headers=headers, cookies=r.cookies, data=urllib.urlencode(data)) soup = bs(r.content, 'lxml') try: info = soup.find('table', attrs={'id': 'ctl00_ContentPlaceHolder1_GridView1'}).find_all( 'tr', attrs={'class': 'GridViewRowStyle'}) for x in info: sn1 = x.find('input', attrs={ 'id': 'ctl00_ContentPlaceHolder1_GridView1_ctl02_hdfbespoke_id'}).get('value', '').strip() rebot_log.info(sn1) if sn == sn1: state = x.find('span', attrs={ 'id': 'ctl00_ContentPlaceHolder1_GridView1_ctl02_lblticketstatus'}).get_text() pcode = x.find('span', attrs={ 'id': 'ctl00_ContentPlaceHolder1_GridView1_ctl02_lblget_ticket_passwd'}).get_text().strip() return { "state": state, "pick_no": pcode, "pick_code": pcode, "pick_site": '', 'raw_order': sn, "pay_money": 0.0, } except: pass
def crawl_from_kuaidaili(self): add_cnt = 0 url = "http://dev.kuaidaili.com/api/getproxy/?orderid=938100440311431&num=200&area=%E4%B8%AD%E5%9B%BD&b_pcchrome=1&b_pcie=1&b_pcff=1&protocol=1&method=2&an_an=1&an_ha=1&sep=2" try: r = requests.get(url, timeout=10) except: return for s in r.content.split("\n"): ipstr = s.strip() if self.valid_proxy(ipstr): rebot_log.info("[crawl_from_kuaidaili] %s", ipstr) self.add_proxy(ipstr) add_cnt += 1 return add_cnt
def crawl_from_samair(self): add_cnt = 0 from selenium import webdriver for i in range(1, 10): url = "http://www.samair.ru/proxy-by-country/China-%02d.htm" % i driver = webdriver.PhantomJS() driver.get(url) for trobj in driver.find_elements_by_tag_name("tr"): lst = re.findall(r"(\d+.\d+.\d+.\d+:\d+)", trobj.text) for ipstr in lst: if self.valid_proxy(ipstr): rebot_log.info("[crawl_from_samair] %s", ipstr) self.add_proxy(ipstr) add_cnt += 1 return add_cnt
def crawl_from_mimiip(self): add_cnt = 0 for i in range(1, 9): url = "http://www.mimiip.com/gngao/%s" % i try: r = requests.get(url, timeout=10) except: continue soup = BeautifulSoup(r.content, "lxml") for s in soup.select("table tr")[1:]: td_lst = s.findAll("td") ip, port = td_lst[0].text.strip(), td_lst[1].text.strip() ipstr = "%s:%s" % (ip, port) if self.valid_proxy(ipstr): rebot_log.info("[crawl_from_mimiip] %s", ipstr) self.add_proxy(ipstr) add_cnt += 1 return add_cnt
def crawl_from_kxdaili(self): url_tpl = "http://www.kxdaili.com/dailiip/%s/%s.html" add_cnt = 0 for i in [1, 2]: for j in range(1, 6): url = url_tpl % (i, j) try: r = requests.get(url, timeout=10, headers={"User-Agent": "Chrome"}) except Exception, e: continue soup = BeautifulSoup(r.content, "lxml") for s in soup.select(".table tr")[1:]: td_lst = s.findAll("td") ip, port = td_lst[0].text.strip(), td_lst[1].text.strip() ipstr = "%s:%s" % (ip, port) if self.valid_proxy(ipstr): rebot_log.info("[crawl_from_kxdaili] %s", ipstr) self.add_proxy(ipstr) add_cnt += 1
def do_refresh_issue(self, order): result_info = { "result_code": 0, "result_msg": "", "pick_code_list": [], "pick_msg_list": [], } if not self.need_refresh_issue(order): result_info.update(result_msg="状态未变化") return result_info ret = self.send_order_request(order) state = ret['state'] pcode = ret['pick_code'] rebot_log.info(ret) rebot_log.info(state) if '失败' in state: result_info.update({ "result_code": 2, "result_msg": state, }) elif '交易成功' in state and pcode: dx_info = { "time": order.drv_datetime.strftime("%Y-%m-%d %H:%M"), "start": order.line.s_sta_name, "end": order.line.d_sta_name, "pcode": pcode, 'person': '取票人', } dx_tmpl = DUAN_XIN_TEMPL[SOURCE_ZHW] code_list = ["%s" % (pcode)] msg_list = [dx_tmpl % dx_info] result_info.update({ "result_code": 1, "result_msg": state, "pick_code_list": code_list, "pick_msg_list": msg_list, }) # rebot_log.info(result_info) return result_info
def crawl_from_ip181(self): url_tpl = "http://www.ip181.com/daili/%s.html" add_cnt = 0 for i in range(1, 6): url = url_tpl % i try: r = requests.get(url, timeout=10, headers={"User-Agent": "Chrome"}) except Exception, e: continue soup = BeautifulSoup(r.content, "lxml") for s in soup.select(".table tr")[1:]: td_lst = s.findAll("td") ip, port = td_lst[0].text.strip(), td_lst[1].text.strip() speed = float(td_lst[4].text.rstrip("秒").strip()) if speed > 1: continue ipstr = "%s:%s" % (ip, port) if self.valid_proxy(ipstr): rebot_log.info("[crawl_from_ip181] %s", ipstr) self.add_proxy(ipstr) add_cnt += 1
def get_pay_page(self, order, valid_code="", session=None, pay_channel="alipay", **kwargs): rebot = order.get_lock_rebot() # 获取alipay付款界面 def _get_page(rebot): if order.status == STATUS_WAITING_ISSUE: pay_url = "http://www.hn96520.com/pay.aspx" headers = { "User-Agent": rebot.user_agent, "Content-Type": "application/x-www-form-urlencoded", } params = { 'o': '0', "h_code": order.raw_order_no, "paymentType": 202, # alipay参数 } cookies = json.loads(rebot.cookies) r = requests.post(pay_url, data=urllib.urlencode(params), headers=headers, cookies=cookies) data = self.extract_alipay(r.content) pay_money = float(data["total_fee"]) trade_no = data["out_trade_no"] if order.pay_money != pay_money or order.pay_order_no != trade_no: order.modify(pay_money=pay_money, pay_order_no=trade_no) return {"flag": "html", "content": r.content} # 登录验证码 if not valid_code: try: info = json.loads(session["pay_login_info"]) headers = info["headers"] cookies = info["cookies"] except: ua = random.choice(BROWSER_USER_AGENT) headers = {"User-Agent": ua} cookies = {} params = { "UserName": rebot.telephone, 'uname': rebot.telephone, "Password": rebot.password, 'RememberMe': 'true', 'backurl': '/qiche/main.aspx', 'next': '', # "vcode": valid_code, } rebot_log.info(params) custom_headers = {} custom_headers.update(headers) r = requests.post( "http://www.chechuw.com/login?backurl=/qiche/main.aspx", data=params, headers=custom_headers, allow_redirects=False, cookies=cookies) cookies.update(dict(r.cookies)) rebot.modify(cookies=json.dumps(cookies)) is_login = rebot.test_login_status() if is_login: # 锁票 if order.status in [STATUS_LOCK_RETRY, STATUS_WAITING_LOCK]: self.lock_ticket(order) # rebot.clear_riders() # rebot.is_locked = False # rebot.save() return _get_page(rebot)
def do_lock_ticket(self, order): lock_result = { "lock_info": {}, "source_account": order.source_account, "result_code": 0, "result_reason": "", "pay_url": "", "raw_order_no": "", "expire_datetime": "", "pay_money": 0, } rebot = order.get_lock_rebot() riders = rebot.add_riders(order) takeman = '' for rider in riders.values(): takeman += ',' + str(rider) line = order.line param = { 'CarNum': line.extra_info.get('carNum', ''), 'bcbm': line.extra_info.get('bcbm', ''), 'ccid': line.extra_info.get('ccid', ''), 'cid': line.extra_info.get('cid', ''), 'clx': line.extra_info.get('clx', ''), 'cmd': line.extra_info.get('cmd', ''), 'cmdid': line.extra_info.get('cmdid', ''), 'cna': line.extra_info.get('cna', ''), 'cnum': line.extra_info.get('cnum', ''), 'crq': line.extra_info.get('crq', ''), 'cmdid': line.extra_info.get('cmdid', ''), 'csj': line.extra_info.get('csj', ''), } rebot_log.info(param) url = 'http://www.hn96520.com/putin.aspx?' + urllib.urlencode(param) cookies = json.loads(rebot.cookies) headers = {'User-Agent': rebot.user_agent} # 买票, 添加乘客, 购买班次 r = requests.get(url, headers=headers, cookies=cookies) soup = bs(r.content, 'lxml') try: title = soup.title info = soup.find('table', attrs={ 'class': 'tblp shadow', 'cellspacing': True, 'cellpadding': True }).find_all('tr') pay_money = info[-1].find_all('td')[-1].get_text() pay_money = float(re.search(r'\d+', pay_money).group(0)) raw_order_no = soup.find('input', attrs={ 'id': 'txt_CopyLink' }).get('value').split('=')[-1] if '准备付款' in title: # rebot_log.info('添加订单成功') expire_time = dte.now() + datetime.timedelta(seconds=15 * 60) lock_result.update({ 'result_code': 1, 'raw_order_no': raw_order_no, "expire_datetime": expire_time, "source_account": rebot.telephone, 'pay_money': pay_money, }) # rebot_log.info(lock_result) else: errmsg = title lock_result.update({ 'result_code': 0, "result_reason": errmsg, "expire_datetime": expire_time, "source_account": rebot.telephone, 'pay_money': 0, }) # 删除之前乘客 rebot.clear_riders(riders) return lock_result except: rebot.clear_riders(riders)