def __init__(self, task, appcfg, clientid): super(SpiderTaoBao, self).__init__(task, appcfg, clientid) self.time = datetime.datetime.now( pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S') self._ha = HttpAccess() self.userid = '' self.cookie = self.task.cookie
def __init__(self, task, appcfg, clientid): super(SpiderSuning, self).__init__(task, appcfg, clientid) self._ha = HttpAccess() self.userid = "" self.time = datetime.datetime.now( pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S") if self.task.cookie: self._ha._managedCookie.add_cookies("suning.com", self.task.cookie)
class TwitterSpider(object): def __init__(self): self._ha = HttpAccess() cookie = ''' personalization_id="v1_pmt2sntu/a8PCORtco8eVg=="; guest_id=v1%3A156194413511829253; ct0=6bb1b8031784f711388377a485cd5bf9; _ga=GA1.2.1901222848.1561944140; _gid=GA1.2.214369962.1561944140; ads_prefs="HBERAAA="; kdt=l6Dkc64O0CPl4qCVtYAuXjXrtxkII2VUjRNqMOfT; remember_checked_on=1; _twitter_sess=BAh7CiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCGsaIKtrAToMY3NyZl9p%250AZCIlNzIyZDlhODdlMTVjYjU3MTRkYTBlY2Y4NGQ5MDQzMjQ6B2lkIiVjZTgw%250ANDQ0ZmIyOTAyY2U0MjQ0NjI4ZTFmNjU0MjgwOToJdXNlcmwrCQHglUis2NwN--8ae08c231e9599f1c5868e6518664b987d936c79; twid="u=998911451833163777"; auth_token=3378bb7bfd90d8bfb9de00b7fb7110633a256852; csrf_same_site_set=1; lang=en; csrf_same_site=1; _gat=1 ''' self._ha._managedCookie.add_cookies('twitter.com', cookie) def get_first_page_info(self): """ 获取第一页的信息 :return: """ url = 'https://twitter.com/Google/followers' headers = ''' accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9,en;q=0.8 cache-control: no-cache pragma: no-cache referer: https://twitter.com/login upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 ''' restring = self._ha.getstring(url, headers=headers) # print(restring) soup = BeautifulSoup(restring, 'lxml') all_divs = soup.find_all( 'div', attrs={ 'class': 'user-actions btn-group not-following not-muting ' }) pass def search(self): url = "https://twitter.com/GEMoving" headers = ''' accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9,en;q=0.8 cache-control: no-cache cookie: guest_id=v1%3A155921858819307425; _ga=GA1.2.999014813.1559218592; tfw_exp=0; kdt=0XTnTydp2g3vpbnSflAWv3kKb1zxBLeoftN3fQgd; remember_checked_on=0; csrf_same_site_set=1; csrf_same_site=1; personalization_id="v1_bHstwWEgjYsaq0IVqrj60Q=="; external_referer=padhuUp37zjgzgv1mFWxJ12Ozwit7owX|0|8e8t2xd8A2w%3D; ads_prefs="HBERAAA="; ct0=4fc617a6fb32b387a3d51da28a37910f; _gid=GA1.2.84942800.1562234004; twid="u=998911451833163777"; auth_token=f75059e44a60f7e68d723ded8571c6938c50c23b; _twitter_sess=BAh7CiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCKsZZ7xrAToMY3NyZl9p%250AZCIlNjAzZjE0NTU0ZjM1NWZlNjZhMmU1YWIyZDEzMzUxOTg6B2lkIiUxODU2%250AZWRhNWM0NjNhNzAyODM3YjJiMTFkNjlmOGY3NzoJdXNlcmwrCQHglUis2NwN--ba48cd0cf7d091fda283e2eb1f9e75fb2d4cd73c; lang=en; _gat=1 pragma: no-cache referer: https://twitter.com/search?q=%E9%82%93%E7%B4%AB%E6%A3%8B&src=typd upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 ''' restring = self._ha.getstring(url, headers=headers) soup = BeautifulSoup(restring, 'lxml') all_divs = soup.find_all( 'div', attrs={ 'class': 'user-actions btn-group not-following not-muting ' }) pass
def __init__(self): AutoPluginBase.__init__(self) self.ha = HttpAccess(0.5) self.ha._managedCookie.add_cookies( '.exploit-db.com', '_ga=GA1.3.659193532.1562029495; _gid=GA1.3.817890876.1562029495; _gat=1; XSRF-TOKEN=eyJpdiI6IkRQd3M1RHljcnhEM2hTVzhiMlcyV2c9PSIsInZhbHVlIjoiNzAwQVdWWFR3ck9oenBHWUd2a2NENTdrNXRqeWtOMU9iWk9pRGRxdVRFcGJyMmc0Q3gwQzBjbVg2bEdDWU5HTSIsIm1hYyI6IjFlMjRkMTcxMmFhODE1NzRmMDc0YWJlZTUzZTRlYTlmZjMyYTU1NDZjMjE5NjdkOTkzMGFjNDZlMzBhMWVjMjIifQ%3D%3D; exploit_database_session=eyJpdiI6IkE0QmJ3alZ3S0RFd3FHcnhQOTQzVGc9PSIsInZhbHVlIjoiYkpDNnh5azdtcmlOYms5cW5sMW9DUnhXYmhmSk9iZlVQa3pxOUVwUEpxQXhUeThMZUF3ZVBpMGRURzNcL1grNHAiLCJtYWMiOiIyODAzMmJjNjUzMWYyYzA0NmY0ZTYwMzFhYjg1YWUyOTc0OTMzNzBhYmYyZDc2MTkwZDYzYWY5Y2M5ZDhhMDI1In0%3D' ) self.page_queue = queue.Queue() self.detailsuffix = 'iscan_expdb' # 带文件体的回馈 self.scriptsuffix = 'iscan_expdb_exp' self.sourcecodesuffix = 'iscan_expdb_app'
def __init__(self, task: IscoutTask): ScoutPlugBase.__init__(self) self.task = task self._ha: HttpAccess = HttpAccess() self._host: str = '.instagram.com' self._login() if Instagram._cookie: self._ha._managedCookie.add_cookies(self._host, Instagram._cookie)
def __init__(self, task: IscoutTask): ScoutPlugBase.__init__(self) self.task = task self._ha: HttpAccess = HttpAccess() self._host: str = '.linkedin.com' self._login() if LinkedIn._cookie: for key, value in LinkedIn._cookie.items(): self._ha._managedCookie.add_cookies(key, value) self._first_page()
class TestCaseHttpAccess(unittest.TestCase): """""" def __init__(self, methodName="runTest"): unittest.TestCase.__init__(self, methodName=methodName) warnings.simplefilter('ignore', ResourceWarning) self._ha: HttpAccess = None @classmethod def setUpClass(cls): # print('Previous condition for all') pass @classmethod def tearDownClass(cls): # print('Post condition for all') pass def setUp(self): # print('Previous condition for each') if not isinstance(self._ha, HttpAccess): self._ha = HttpAccess() def tearDown(self): # print('Post condition for each') pass # 测试用例的命名必须以test开头,否则不予执行 # @unittest.skip('') @unittest.skip def test_skip(self): pass def test_instruct(self): self.assertIsNotNone(self._ha) def test_getstring(self): self.assertIsNotNone(self._ha) url = 'https://www.baidu.com' html = self._ha.getstring(url, headers=''' Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: www.baidu.com Pragma: no-cache Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36''' ) self.assertFalse(html is None and html == '')
def __init__(self, task: Task, appcfg: AppCfg, clientid: str, logger_name_ext: str = ""): if not isinstance(task, Task): raise Exception("Task is invalid.") if not isinstance(appcfg, AppCfg): raise Exception("AppConfig is invalid.") if not isinstance(clientid, str) or clientid == "": raise Exception("Invalid clientid") self.task = task self._clientid: str = clientid self._appcfg = appcfg # logger和插件名 self._name = type(self).__name__ loggername = f"{self._name}_{self.task.batchid}" if not logger_name_ext is None and not logger_name_ext == "": loggername += "_{}".format(logger_name_ext) self._logger: MsLogger = MsLogManager.get_logger(loggername) # Http库对象 self._ha: HttpAccess = HttpAccess() # 一些通用字段,用于存放当前插件登陆的账号的一些到处都要用的信息 self._userid: str = None # 网站对用户的唯一识别标识 self._account: str = self.task.account # 可以用于登陆的账号名 self._username: str = None # 用户昵称 self._globaltelcode: str = self.task.globaltelcode # 国际区号 self._phone: str = self.task.phone # 电话 self._url: str = self.task.url self._host: str = self.task.host self._cookie: str = self.task.cookie # 一些状态对象 self._errorcount: int = 0 self.is_running: bool = False self.running_task = [] # 验证码有效时间定为900秒, 15分钟足够了,一般验证码的有效时间最高也就10分钟 self._effective_time = 900 # self._outputtgfile = OutputManage() self._sqlfunc = DbManager # 线程运行 self._running = True # 停止标志,默认不停止, 1表示继续下载不停False,0表示停止True self._stop_sign = False
def __init__(self, task: IscoutTask): ScoutPlugBase.__init__(self) self.task = task self._ha: HttpAccess = HttpAccess() self.basic_url = "https://www.instagram.com/" self.headers = """ accept: */*, accept-encoding: gzip, deflate, br, accept-language: zh-CN,zh;q=0.9, user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36, """ # 搜索栏api self.searchBox_api = 'https://www.instagram.com/web/search/topsearch/?context=blended&query={}&include_reel=true' # 用户数据api self.userData_api = 'https://www.instagram.com/{}/?__a=1' self.source = "instagram" self.reason = "instagram身份落地"
def __init__(self, task, appcfg, clientid): super(SpiderTongCheng, self).__init__(task, appcfg, clientid) self.cookie = self.task.cookie self.ha = HttpAccess() if self.cookie: self.ha._managedCookie.add_cookies('ly.com', self.cookie)
class SpiderTongCheng(SpiderTravelBase): def __init__(self, task, appcfg, clientid): super(SpiderTongCheng, self).__init__(task, appcfg, clientid) self.cookie = self.task.cookie self.ha = HttpAccess() if self.cookie: self.ha._managedCookie.add_cookies('ly.com', self.cookie) def _cookie_login(self): url = 'https://member.ly.com/information' headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: member.ly.com Pragma: no-cache Referer: https://member.ly.com/order Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers).replace('\r', '').replace( '\n', '').replace('\t', '') html = re.sub(r'\s{2,}', '', html) soup = BeautifulSoup(html, 'lxml') try: phone = soup.select_one("#tel").get_text('-') phone = phone.split('-')[1] userid = substring(self.cookie, 'userid=', '&') if userid: self.userid = userid + '-tongcheng' return True elif phone: self.userid = phone + '-tongcheng' return True else: return False except: return False def _get_profile(self): try: url = 'https://member.ly.com/information' headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: member.ly.com Pragma: no-cache Referer: https://member.ly.com/order Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers).replace( '\r', '').replace('\n', '').replace('\t', '') html = re.sub(r'\s{2,}', '', html) soup = BeautifulSoup(html, 'lxml') detail = {} res = PROFILE(self._clientid, self.task, self._appcfg._apptype, self.userid) try: res.nickname = soup.select_one('#txtsmallName')['value'] except: pass try: detail['realname'] = soup.select_one('#txtName')['value'] except: pass try: email = soup.select_one("#email").get_text('-') res.emali = email.split('-')[1] except: pass try: phone = soup.select_one("#tel").get_text('-') res.phone = phone.split('-')[1] except: pass try: text = substring(html, 'class="sex1"', 'checked="checked"') sexnum = re.findall(r'name="sex"', text) if len(sexnum) == 1: res.gender = EGender.Male elif len(sexnum) == 2: res.gender = EGender.Female except: pass try: detail['profession'] = soup.select_one('#ddlZhiye').get_text() except: pass try: res.bithday = soup.select_one('#hfYear')['value'] + '-' + soup.select_one('#hfMonth')['value'] + '-' + \ soup.select_one('#hfDay')['value'] except: pass try: detail['QQ'] = soup.select_one('#txtQQ')['value'] except: pass res.append_details(detail) photourl = soup.select_one('#contentHead img.png')['src'] if photourl: photourl = 'https:' + photourl profilepic: RESOURCES = RESOURCES(self._clientid, self.task, photourl, EResourceType.Picture, self._appcfg._apptype) resp_stream: ResponseIO = self.ha.get_response_stream(photourl) profilepic.io_stream = resp_stream profilepic.filename = photourl.rsplit('/', 1)[-1] profilepic.sign = ESign.PicUrl res.append_resource(profilepic) yield profilepic yield res except Exception: self._logger.error('{} got profile fail {}'.format( self.userid, traceback.format_exc())) def _get_orders(self): try: page = 0 while True: page += 1 url = 'https://member.ly.com/orderajax/default?OrderFilter=0&DateType=0&PageIndex={}'.format( page) headers = """ Accept: */* Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: member.ly.com Pragma: no-cache Referer: https://member.ly.com/order User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 X-Requested-With: XMLHttpRequest""" html = self.ha.getstring(url, headers=headers) jshtml = json.loads(html) OrderDetailList = jshtml['ReturnValue']['OrderDetailList'] if OrderDetailList: for OrderDetail in OrderDetailList: try: orderid = OrderDetail['OrderId'] order = {} order['title'] = OrderDetail['ProductName'] order['FirstDesc'] = OrderDetail['FirstDesc'] order['price'] = OrderDetail['ProductPrice'] order['status'] = OrderDetail['ChieseOrderStatus'] order['SerialId'] = OrderDetail['SerialId'] order['ExtendData'] = OrderDetail['ExtendData'] OrderDetailUrl = OrderDetail['OrderDetailUrl'] ordertime, detail = self._order_detail( OrderDetailUrl) if detail: order['detail'] = detail res_one = ITRAVELORDER_ONE(self.task, self._appcfg._apptype, self.userid, orderid) res_one.append_orders(order) res_one.ordertime = ordertime res_one.host = 'www.ly.com' yield res_one except: pass OrderListCount = jshtml['ReturnValue']['OrderListCount'] if OrderListCount <= 10 * page: break except Exception: self._logger.error('{} got order fail: {}'.format( self.userid, traceback.format_exc())) def _order_detail(self, orderurl): orderurl = 'https:' + orderurl headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: member.ly.com Pragma: no-cache Referer: https://member.ly.com/order Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(orderurl, headers=headers).replace('\n', '').replace( '\t', '') soup = BeautifulSoup(html, 'lxml') # 火车票全部js加密了暂时未获取 # 景点 dic = {} ordertime = '' if re.findall(r'Scenery', orderurl): ordertime = substring(html, '创建时间:', ' <') dic['Contacts'] = soup.select_one('.infor_box table').get_text(' ') # 机票 elif re.findall(r'Flight', orderurl): ordertime = soup.select_one('.orderTime span').get_text('') dic['Passenger'] = soup.select_one('.no_bottom.infoLine').get_text( ' ') dic['Contacts'] = soup.select_one('.contactPerson').get_text(' ') # 酒店 elif re.findall(r'hotel', orderurl): ordertime = soup.select_one('.time-point').get_text(' ') dic['checkinPerson'] = soup.select_one( '.checkin-info.part').get_text(' ') return ordertime, dic
class SpiderTuniu(SpiderTravelBase): def __init__(self, task, appcfg, clientid): super(SpiderTuniu, self).__init__(task, appcfg, clientid) self.cookie = self.task.cookie self.ha = HttpAccess() if self.cookie: self.ha._managedCookie.add_cookies('tuniu.com', self.cookie) def _check_registration(self): """ 查询手机号是否注册了途牛 :param account: :return: """ t = time.strftime('%Y-%m-%d %H:%M:%S') try: url = "https://passport.tuniu.com/register" html = self._ha.getstring(url, headers=""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36""" ) headers = """ Accept: */* Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Origin: https://passport.tuniu.com Referer: https://passport.tuniu.com/register User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36 X-Requested-With: XMLHttpRequest""" url = 'https://passport.tuniu.com/register/isPhoneAvailable' postdata = f"intlCode=0086&tel={self.task.phone}" html = self._ha.getstring(url, headers=headers, req_data=postdata) if '"errno":-1,' in html: self._write_task_back(ECommandStatus.Succeed, 'Registered', t, EBackResult.Registerd) else: self._write_task_back(ECommandStatus.Succeed, 'Not Registered', t, EBackResult.UnRegisterd) except Exception: self._logger.error('Check registration fail: {}'.format( traceback.format_exc())) self._write_task_back(ECommandStatus.Failed, 'Check registration fail', t, EBackResult.CheckRegisterdFail) return def _cookie_login(self): url = 'https://i.tuniu.com/usercenter/usercommonajax/japi' headers = """ Accept: application/json, text/javascript, */*; q=0.01 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Content-Length: 76 Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Host: i.tuniu.com Origin: https://i.tuniu.com Pragma: no-cache Referer: https://i.tuniu.com/userinfoconfirm User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 X-Requested-With: XMLHttpRequest""" postdata = 'serviceName=MOB.MEMBERS.InnerController.getUserInfo&serviceParamsJson=%7B%7D' try: html = self.ha.getstring(url, headers=headers, req_data=postdata) jshtml = json.loads(html) userid = jshtml['data']['data']['userId'] if userid: self.userid = str(userid) + '-tuniu' return True else: return False except: return False def _get_profile(self): try: url = 'https://i.tuniu.com/usercenter/usercommonajax/japi' headers = """ Accept: application/json, text/javascript, */*; q=0.01 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Content-Length: 76 Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Host: i.tuniu.com Origin: https://i.tuniu.com Pragma: no-cache Referer: https://i.tuniu.com/userinfoconfirm User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 X-Requested-With: XMLHttpRequest""" postdata = 'serviceName=MOB.MEMBERS.InnerController.getUserInfo&serviceParamsJson=%7B%7D' html = self.ha.getstring(url, headers=headers, req_data=postdata) jshtml = json.loads(html) res = PROFILE(self._clientid, self.task, self._appcfg._apptype, self.userid) userid = jshtml['data']['data']['userId'] res.nickname = jshtml['data']['data']['nickName'] res.phone = jshtml['data']['data']['tel'] res.birthday = jshtml['data']['data']['birthday'] res.email = jshtml['data']['data']['email'] res.address = jshtml['data']['data']['additionalAddress'] sex = jshtml['data']['data']['sex'] if sex == 1: res.gender = EGender.Male elif sex == 0: res.gender = EGender.Female else: res.gender = EGender.Unknown detail = jshtml['data']['data'] res.append_details(detail) photourl = jshtml['data']['data']['largeAvatarUrl'] if photourl: profilepic: RESOURCES = RESOURCES(self._clientid, self.task, photourl, EResourceType.Picture, self._appcfg._apptype) resp_stream: ResponseIO = self.ha.get_response_stream(photourl) profilepic.io_stream = resp_stream profilepic.filename = photourl.rsplit('/', 1)[-1] profilepic.sign = ESign.PicUrl res.append_resource(profilepic) yield profilepic yield res except Exception: self._logger.error('{} .got profile fail: {}'.format( self.userid, traceback.format_exc())) def _get_orders(self): try: page = 0 while True: page += 1 url = 'https://i.tuniu.com/usercenter/usercommonajax/japi/getOrderList?serviceName=MOB.MEMBER.InnerOrderController.getOrderList&serviceParamsJson=%7B%22type%22%3A0%2C%22page%22%3A{}%2C%22status%22%3A0%2C%22size%22%3A5%7D&_={}'.format( page, int( datetime.now( pytz.timezone('Asia/Shanghai')).timestamp() * 1000)) headers = """ Accept: application/json, text/javascript, */*; q=0.01 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: i.tuniu.com Pragma: no-cache Referer: https://i.tuniu.com/list/ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 X-Requested-With: XMLHttpRequest""" html = self.ha.getstring(url, headers=headers) jshtml = json.loads(html) orderList = jshtml['data']['data']['orderList'] if orderList: for order in orderList: try: orderid = order['orderId'] ordertime = order['orderTime'] res_one = ITRAVELORDER_ONE(self.task, self._appcfg._apptype, self.userid, orderid) res_one.append_orders(order) res_one.ordertime = ordertime res_one.host = 'www.tuniu.com' yield res_one except: pass totalpage = jshtml['data']['data']['totalPage'] if totalpage <= page: break except Exception: self._logger.error('{} got order fail: {}'.format( self.userid, traceback.format_exc()))
class SpiderTaoBao(SpiderShoppingBase): def __init__(self, task, appcfg, clientid): super(SpiderTaoBao, self).__init__(task, appcfg, clientid) self.time = datetime.datetime.now( pytz.timezone('Asia/Shanghai')).strftime('%Y-%m-%d %H:%M:%S') self._ha = HttpAccess() self.userid = '' self.cookie = self.task.cookie def _cookie_login(self): self._ha._managedCookie.add_cookies("taobao.com", self.cookie) try: resopnse = self._ha.getstring( 'https://member1.taobao.com/member/fresh/account_security.htm') soup1 = BeautifulSoup(resopnse, 'html.parser') account = soup1.find_all( "span", {"class": "default grid-msg "})[0].get_text() if account: self.userid = account + '-taobao' return True else: return False except: return False # def _needcode(self): # # 判断是否需要验证码,一般都不需要。若需要就不再执行之后的代码,验证码待解决 # r = self._ha.getstring('https://login.taobao.com/member/request_nick_check.do?_input_charset=utf-8') # pat = re.compile(r'"needcode":false') # isneed = pat.findall(r) # if isneed: # res = True # else: # res = False # return res # # # def pwd_login(self): # if self._needcode(): # # 手动登陆一次获取账号,和加密后的256位密码。um_token,ua 可以持续使用 # try: # # 获取um # url = 'https://ynuf.alipay.com/service/um.json' # data = '''data=ENCODE~~V01~~eyJ4diI6IjMuMy43IiwieHQiOiJDMTUzOTc1ODg3MzI3OTkwODY1NDAzMjI5MTUzOTc1ODg3MzI3OTI5NSIsImV0ZiI6InUiLCJ4YSI6InRhb2Jhb19sb2dpbiIsInNpdGVJZCI6IiIsInVpZCI6IiIsImVtbCI6IkFBIiwiZXRpZCI6IiIsImVzaWQiOiIiLCJ0eXBlIjoicGMiLCJuY2UiOnRydWUsInBsYXQiOiJXaW4zMiIsIm5hY24iOiJNb3ppbGxhIiwibmFuIjoiTmV0c2NhcGUiLCJubGciOiJ6aC1DTiIsInN3IjoxNDQwLCJzaCI6OTAwLCJzYXciOjE0NDAsInNhaCI6ODYwLCJic3ciOjE0MTUsImJzaCI6OTE5LCJlbG9jIjoiaHR0cHMlM0ElMkYlMkZsb2dpbi50YW9iYW8uY29tJTJGbWVtYmVyJTJGbG9naW4uamh0bWwiLCJldHoiOjQ4MCwiZXR0IjoxNTM5NzU4ODczNDYxLCJlY24iOiJiNmUzNGRlZDBhMGQxMWFkOWJhM2Q5MjI0MmIyZWExZThhMmU5MTYxIiwiZWNhIjoiRk1sTkZHUkJ2alVDQVdYTWU5ZGN6QU5CIiwiZXJkIjoiZGVmYXVsdCxjb21tdW5pY2F0aW9ucyxhOTY4MWU4MTYwMzk5ZGVmMjkwN2IzM2JlMDFjZDU1ZDVmY2Q0NTUyYWE0MmNjZGYxZDc0MzljNmNlM2VkNDVkIiwiY2FjaGVpZCI6ImE2MTU1OGRkMDk0ZGJjNDciLCJ4aCI6IiIsImlwcyI6IjE5Mi4xNjguNDAuMjciLCJlcGwiOjMsImVwIjoiMmZiZjRhMGQzNDIxNGQ0ZmRlNmNjOGEyMjg5N2QxMTVhNzY2NzgxMSIsImVwbHMiOiJDMzcwYzMwN2Y0YWNhNzg1ODQ5M2RmZTMyMjI1NGU1Y2I0MzhiZTk0NCxOMGZjZDZlMThmZjZkZjc0Zjk4YTY5OGI3ZjZiNmQ4MzhhNmMxMWU2OSIsImVzbCI6ZmFsc2V9''' # r0 = self._ha.getstring(url, req_data=data) # patum = re.compile(r'{"tn":"(.*?)"') # um = patum.findall(r0)[0] # # url = 'https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976' # data = """TPL_username={account}&TPL_password=&ncoSig=&ncoSessionid=&ncoToken=8374672d18e483bd0f6f39b0638cf4f717e652a3&slideCodeShow=false&useMobile=false&lang=zh_CN&loginsite=0&newlogin=0&TPL_redirect_url=https%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.1997525045.1.513d29b4SpO5fP&from=tbTop&fc=default&style=&css_style=&keyLogin=false&qrLogin=true&newMini=false&newMini2=false&tid=&loginType=3&minititle=&minipara=&pstrong=&sign=&need_sign=&isIgnore=&full_redirect=&sub_jump=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=&gvfdcname=10&gvfdcre=68747470733A2F2F756C616E642E74616F62616F2E636F6D2F73656D2F74627365617263683F7265667069643D6D6D5F32363633323235385F333530343132325F3332353338373632266B6579776F72643D26636C6B313D37333763303966343036323835646335356337353734373936303632366633362675707369643D3733376330396634303632383564633535633735373437393630363236663336&from_encoding=&sub=true&TPL_password_2={password}&loginASR=1&loginASRSuc=1&allp=&oslanguage=zh-CN&sr=1440*900&osVer=&naviVer=chrome%7C70.035284&osACN=Mozilla&osAV=5.0+%28Windows+NT+10.0%3B+Win64%3B+x64%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F70.0.3528.4+Safari%2F537.36&osPF=Win32&miserHardInfo=&appkey=00000000&nickLoginLink=&mobileLoginLink=https%3A%2F%2Flogin.taobao.com%2Fmember%2Flogin.jhtml%3Ffrom%3Dtaobaoindex%26f%3Dtop%26style%3D%26sub%3Dtrue%26redirect_url%3Dhttps%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.1997525045.1.513d29b4SpO5fP%26useMobile%3Dtrue&showAssistantLink=&um_token={um}&ua=112%23y7ZAac4WDEN%2B4mHzkW7CXzLl83YWtnWIfYqlxuD9pkxezGSBy82xHdU1%2Bz9HuXkMfR26HWpFZNWOI4DAmypKUNAFwuPH73TKN9emJlAiVVAQ1PxFlwKtsUH2e1JUVzTAlhrXT3HKWgp97V6117sF07h%2B80F1DbfQOC9S%2BcI0LOuAZ9EhTbhtawmxPLUgc7ub3BPq6XoCuCKcHPW2ab%2Fm07lh4UXqZAHrs4SsVffYA8usbPYVhkr9qNcciJ7oQ9tKSydH16TUwGpoLupH6oZ73pNyazmIkh6ngIefwMzYow%2Fw1%2FrQuUvVNxvF2%2BBd0ZxNkGGL8smu0EuIN4tVkqotEe6vLvwlyfhtDBLx6w3r%2Bn1GcfNVfVcDcCfNryZRhVyJceksSl%2Bz3yuxMuvikeUYKCqr9nN%2B5R5VHeVf83cA7e6XP5ApLtdMNhMdPdk8crONeCpmo1F9F6695Sajqz0KXIfDZbh5vnjvfIU4bvMPZt48%2BSN8boo9M%2BPfJX1%2B%2Bpy4edvCrktGxHvLBLyg1d3pH1t7qFSxN6VmvpggiSOD1EOchJl1ayWIOIi9i4OGEPZY12XkWZM%2F0U0ZCTPuV8oeoD1FAeyNaDWtDO7pBs0ZZtW7lKC5wQx9vfV68F%2B0cgH24SToFWlHCsBX5WF9l0SsozHR%2F7xqN2xOZ%2FwDc62bh9LzmIXop7l%2Bsi5lpcR5u2nOzroru4xSgyH5pDQ2AcIumWGEuTyF4V808dkWo0ng9QApgco8KtNUfLQzZpCGh%2BZWtnON1vPOb3SexR5fLREAb1mM%2Fc7Uc9FDpP%2F17MMZpnL5krOZA4l%2B7fysXJ4KiqEDhPnz7cMZrqCw%2F%2FMxiR3sKfm%2F%2Fc7%2BTAv4Yy3g5WX5QcXcYm5qvRKPMl1VpYetEoIcx%2FK9v26IgW3PWh5u0%2F3mN87FlXNOw%2Bw09BTeD67nz4NITOPlhlYPH6XyGIfxnzwsJKzViMWPvgxfwlTCEUiGlafbi8Oy%2FDLyFEL3kj06%2FB%2B4WLdskNlinhO5TfulxWaZbCjcDR%2FSVMovkXl7B6rT4O1GeOq6qSN1gI%2Bi5fv0U4Qo9xz%2BcQg9A95Go0XwHaFUo7f5QkeeP%2F3hdd%2Bu5aj5IhBnl1D7lZzlUt1QQC2Fz%2B8uHk3X%2Beduq37gWATJBAU07MhWd%2Fq0Ou7lz9KjiVbarWJb0vQqyM40SJlsF6OWjaQ%2FVyRNMwi5afotZrV99yuInrrSVoeZ%2BawQxsj7eLZ%2F1fySG%2BhBgmGcsUeMEIbgw8PsUGnu9vopAfDD2S8Zsxo5FiFV318D3%2BefR8EXItfuDcy0VpymDa9PKOiZxL3CN%2F4Ih66elaec61KGI9kzeFJQvHxJo3%2BE31wxDjqvGGfWzSKpiP8LJI9L2OrH%2FnCpPtb1O0nFRxXO0y0cf%2BKAXLybh88M9sRc%2BFoSjzP0UpDzAMGWwnPmCNuGNeBGmA47O%2BjSpQvPQZoHyARO1ck2pZgFN%2FUIK9dbB64vBoOambxtKL%2BSPX7b%2B4Uqkgn%2BS71UarxKI9Z9%2F%2Flz0szGzOK2O9BR1JRo0vqMI9pOOwC8U%2FVmfZ5SqC1VgduP983JSiEhof7LYnnlBhcIcyuuqqJ%2F4iUVzTXb%2BwDFs%2F2e%2BPjDf%2FQM%2Bq%2F6cclZYrZjuxqjI1e0FKcaPRuFBVcL%2BVx4Pf4GQ4u2JXluJv2jdihrAevHxu3Zx1HIFPuqOCIYBoDF4zqcKgBWiymhrN%2FsRpe%2B""".format(account=self.account, password=self.password, um=um) # headers = """ # accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 # accept-encoding: gzip, deflate # accept-language: zh-CN,zh;q=0.9 # Origin: https://login.taobao.com # Upgrade-Insecure-Requests: 1 # Content-Type: application/x-www-form-urlencoded # Connection: keep-alive # Host: login.taobao.com # Referer: https://login.taobao.com/member/login.jhtml?spm=a2e15.8261149.754894437.1.118f29b4TQ4p9O&f=top&redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976 # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36 # """ # r = self._ha.getstring(url, req_data=data, headers=headers, encoding='gb2312') # pattoken = re.compile(r'token=(.*?)&') # token = pattoken.findall(r)[0] # # # 通过token获取st # url1 = 'https://passport.alibaba.com/mini_apply_st.js?site=0&token={token}&callback=callback'.format(token=token) # headers1 = """ # Accept: */* # Accept-Encoding: gzip, deflate, br # Accept-Language: zh-CN,zh;q=0.9 # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36 # Referer: https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Fkeyword%3D%25E7%25BD%2591%25E6%25B7%2598%25E5%25AE%259D%26refpid%3Dmm_26632258_3504122_32538762%26clk1%3D96c17d51a8de3455444def907818d976%26upsid%3D96c17d51a8de3455444def907818d976 # Host: passport.alibaba.com # Connection: keep-alive # """ # r1 = self._ha.getstring(url1, headers=headers1) # patst = re.compile(r'st":"(.*?)"') # st = patst.findall(r1)[0] # # # 通过st模拟登陆 # url2 = 'https://login.taobao.com/member/vst.htm?st={st}&TPL_username={account}'.format(st=st, account=self.account) # headers2 = """ # accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 # accept-encoding: gzip, deflate, br # accept-language: zh-CN,zh;q=0.9 # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36 # Upgrade-Insecure-Requests: 1 # Connection: keep-alive # Host: login.taobao.com # """ # r2 = self._ha.getstring(url2, headers=headers2) # except Exception as ex: # self._logger.error("Download error: %s" % ex) # return False # newcookie = self._ha._managedCookie.get_cookie_for_domain('https://www.taobao.com') # # print(newcookie) # self.cookie = newcookie # self.task.cookie = newcookie # return True def _get_profile(self): # 个人信息 try: url = 'https://i.taobao.com/user/baseInfoSet.htm' headers = """ accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cookie: {cookie} upgrade-insecure-requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36 """.format(cookie=self.cookie) r = self._ha.getstring(url, headers=headers) # print(r) soup = BeautifulSoup(r, 'html.parser') res = PROFILE(self._clientid, self.task, self._appcfg._apptype, self.userid) detail = {} photourl = soup.select_one('.pf-avatar img.png')['src'] try: res.nickname = soup.select_one('#J_uniqueName')['value'] except: pass try: detail['fullname'] = soup.select_one('#J_realname')['value'] except: pass try: gender = soup.select_one( '.except [checked="checked"]')['value'] if gender == '0': res.gender = EGender.Male elif gender == '1': res.gender = EGender.Female else: res.gender = EGender.Unknown except: pass try: year = soup.select_one( '#J_Year [selected="selected"]')['value'] month = soup.select_one( '#J_Month [selected="selected"]')['value'] data = soup.select_one( '#J_Date [selected="selected"]')['value'] res.birthday = year + '-' + month + '-' + data except: pass resopnse = self._ha.getstring( 'https://member1.taobao.com/member/fresh/account_security.htm') soup1 = BeautifulSoup(resopnse, 'html.parser') try: res.account = soup1.find_all( "span", {"class": "default grid-msg "})[0].get_text() except: pass try: res.email = soup1.find_all( "span", {"class": "default grid-msg "})[1].get_text() except: pass try: res.phone = soup1.find("span", { "class": "default grid-msg" }).get_text().strip() except: pass # 获取从地区表中获取住址 try: liveDivisionCode = soup.select_one( 'input#liveDivisionCode')['value'] if liveDivisionCode: detail['hometown'] = self._get_address(liveDivisionCode) except: pass try: divisionCode = soup.select_one('input#divisionCode')['value'] if divisionCode: res.address = self._get_address(divisionCode) res.detail = json.dumps(detail) except: pass if photourl: photourl = 'https:' + photourl profilepic: RESOURCES = RESOURCES(self._clientid, self.task, photourl, EResourceType.Picture, self._appcfg._apptype) resp_stream: ResponseIO = self._ha.get_response_stream( photourl) profilepic.io_stream = resp_stream profilepic.filename = photourl.rsplit('/', 1)[-1] profilepic.sign = ESign.PicUrl res.append_resource(profilepic) yield profilepic yield res except Exception: self._logger.error('{} got profile fail: {}'.format( self.userid, traceback.format_exc())) def _get_orders(self): try: self._ha._managedCookie.add_cookies("taobao.com", self.cookie) url = 'https://buyertrade.taobao.com/trade/itemlist/asyncBought.htm?action=itemlist/BoughtQueryAction&event_submit_do_query=1&_input_charset=utf8' i = 0 while True: i += 1 formdata = """dateBegin=0&dateEnd=0&options=0&pageNum={i}&pageSize=15&queryOrder=desc&prePageNo={j}""".format( i=i, j=i - 1) headers = """ accept: application/json, text/javascript, */*; q=0.01 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache content-type: application/x-www-form-urlencoded; charset=UTF-8 origin: https://buyertrade.taobao.com pragma: no-cache referer: https://buyertrade.taobao.com/trade/itemlist/list_bought_items.htm?spm=a1z02.1.a2109.d1000368.1c2d782dHeADbf&nekot=1470211439694 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36 x-requested-with: XMLHttpRequest """ # cookie: {cookie} response = self._ha.getstring(url, headers=headers, req_data=formdata) html = json.loads(response) mainorders = html.get('mainOrders') if mainorders: # print("抓取第{0:d}页。".format(i)) for order in mainorders: try: dic = {} orderid = order.get('id') dic['id'] = order.get('id') dic['shopname'] = order.get('seller').get( 'shopName') ordertime = order.get('orderInfo').get( 'createTime') dic['actualFee'] = order.get('payInfo').get( 'actualFee') dic['status'] = order.get('statusInfo').get('text') goods = [] for item in order['subOrders']: di = {} try: di['title'] = item.get('itemInfo').get( 'title') except: pass try: di['quantity'] = item['quantity'] except: pass try: di['skuText'] = item['itemInfo']['skuText'] except: pass try: di['priceInfo'] = item['priceInfo'] except: pass goods.append(di) dic['goods'] = goods res_one = ISHOPPING_ONE(self.task, self._appcfg._apptype, self.userid, orderid) res_one.ordertime = ordertime res_one.append_orders(dic) res_one.host = 'www.taobao.com' yield res_one except: pass time.sleep(1) else: break except Exception: self._logger.error('{} got order fail: {}'.format( self.userid, traceback.format_exc())) def _get_address(self, adressid): table = self._ha.getstring( 'https://www.taobao.com/home/js/sys/districtselector.js?t=20140318.js' ) patdz = re.compile( r'TB.form.DistrictSelector._tb_ds_data=(.*?);TB.form.Di') dzdata = patdz.findall(table)[0] jsdata = json.loads(dzdata) res = [] if adressid is not None and adressid != '1': dz1 = jsdata[adressid] res = dz1[0] if dz1[1] != '1': dz2 = jsdata[dz1[1]] res = dz2[0] + res if dz2[1] != '1': dz3 = jsdata[dz2[1]] res = dz3[0] + res return res def _logout(self): res = False try: url = 'https://login.taobao.com/member/logout.jhtml?spm=a1z02.1.754894437.7.7016782dPtkeCQ&f=top&out=true&redirectURL=https%3A%2F%2Fi.taobao.com%2Fmy_taobao.htm%3Fspm%3Da2e15.8261149.754894437.3.555929b48sljpe%26ad_id%3D%26am_id%3D%26cm_id%3D%26pm_id%3D1501036000a02c5c3739%26nekot%3DdGI4NTgzMzYzXzAw1553481160507' html = self._ha.getstring(url, headers=""" Host: login.taobao.com Connection: keep-alive Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Referer: https://i.taobao.com/my_taobao.htm?spm=a2e15.8261149.754894437.3.555929b48sljpe&ad_id=&am_id=&cm_id=&pm_id=1501036000a02c5c3739&nekot=dGI4NTgzMzYzXzAw1553481160507 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 """) res = self._cookie_login() if not res: res = True except Exception: self._logger.error('login out fail:{}'.format( traceback.format_exc())) return res
def __init__(self, task, appcfg, clientid): super(SpiderMafengwo, self).__init__(task, appcfg, clientid) self.ha = HttpAccess() if self.task.cookie: self.ha._managedCookie.add_cookies('mafengwo.cn', self.task.cookie)
class SpiderMafengwo(SpiderTravelBase): def __init__(self, task, appcfg, clientid): super(SpiderMafengwo, self).__init__(task, appcfg, clientid) self.ha = HttpAccess() if self.task.cookie: self.ha._managedCookie.add_cookies('mafengwo.cn', self.task.cookie) def _check_registration(self): """ 查询手机号是否注册了马蜂窝 :param account: :return: """ t = time.strftime('%Y-%m-%d %H:%M:%S') try: phone = self.task.phone url = 'https://passport.mafengwo.cn/regist.html' headers = """ Host: passport.mafengwo.cn Connection: keep-alive Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Referer: http://www.mafengwo.cn/?mfw_chid=3546 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 """ html = self.ha.getstring(url, headers=headers) token = substring(html, 'name="token" value="', '"') url = 'https://passport.mafengwo.cn/regist' headers = """ Host: passport.mafengwo.cn Connection: keep-alive Content-Length: 59 Cache-Control: max-age=0 Origin: https://passport.mafengwo.cn Upgrade-Insecure-Requests: 1 Content-Type: application/x-www-form-urlencoded User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Referer: https://passport.mafengwo.cn/regist.html Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 """ data = f"token={token}&passport={phone}" html = self.ha.getstring(url, headers=headers, req_data=data) isreg = re.findall(r'<div class="alert alert-danger">', html) if isreg: self._write_task_back(ECommandStatus.Succeed, 'Registered', t, EBackResult.Registerd) else: self._write_task_back(ECommandStatus.Succeed, 'Not Registered', t, EBackResult.UnRegisterd) except Exception: self._logger.error('Check registration fail: {}'.format( traceback.format_exc())) self._write_task_back(ECommandStatus.Failed, 'Check registration fail', t, EBackResult.CheckRegisterdFail) return def _cookie_login(self): try: url = 'https://passport.mafengwo.cn/setting/security/' headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: passport.mafengwo.cn Pragma: no-cache Referer: https://passport.mafengwo.cn/setting/ Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers) userid = substring(html, '"UID":', ',') if userid: self.userid = userid + '-mafengwo' return True else: return False except Exception: self._logger.error('Mafengwo cookie login error: {}'.format( traceback.format_exc())) return False def _get_profile(self): try: url = 'https://passport.mafengwo.cn/setting/' headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: passport.mafengwo.cn Pragma: no-cache Referer: https://www.mafengwo.cn Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers) soup = BeautifulSoup(html, 'lxml') res = PROFILE(self._clientid, self.task, self._appcfg._apptype, self.userid) res.nickname = soup.select_one('[name="name"]')['value'] # 1男0女2保密 sex = soup.select_one('[checked="true"]')['value'] if sex == '1': res.gender = EGender.Male elif sex == '0': res.gender = EGender.Female else: res.gender = EGender.Unknown res.city = soup.select_one('[name="city"]')['value'] res.birthday = soup.select_one('[name="birthday"]')['value'] detail = {} detail['introduce'] = soup.select_one('[name="intro"]').get_text() if detail['introduce']: res.append_details(detail) url = 'https://passport.mafengwo.cn/setting/security/' headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: passport.mafengwo.cn Pragma: no-cache Referer: https://passport.mafengwo.cn/setting/ Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers).replace('\n', '') html = re.sub(r'\s{2,}', '', html) soup = BeautifulSoup(html, 'lxml') userid = substring(html, '"UID":', ',') email = soup.select('.userpass dd')[1].get_text('-') res.email = email.split('-')[0] phone = soup.select('.userpass dd')[2].get_text('-') res.phone = phone.split('-')[0] url = 'https://pagelet.mafengwo.cn/user/apps/pagelet/pageViewHeadInfo?callback=jQuery181042165802873390845_{}¶ms=%7B%22type%22%3A1%7D&_={}'.format( int( datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000), int( datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000)) headers = """ Accept: */* Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: pagelet.mafengwo.cn Pragma: no-cache Referer: https://passport.mafengwo.cn/setting/ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers).replace('\\', '') photourl = substring(html, '<img.png src="', '"') if photourl: profilepic: RESOURCES = RESOURCES(self._clientid, self.task, photourl, EResourceType.Picture, self._appcfg._apptype) resp_stream: ResponseIO = self._ha.get_response_stream( photourl) profilepic.io_stream = resp_stream profilepic.filename = photourl.rsplit('/', 1)[-1] profilepic.sign = ESign.PicUrl res.append_resource(profilepic) yield profilepic yield res except Exception: self._logger.error('{} got profile fail: {}'.format( self.userid, traceback)) def _get_orders(self): try: start = -10 while True: start += 10 url = f'https://www.mafengwo.cn/order_center/?status=0&start={start}' headers = """ accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: https://www.mafengwo.cn/order_center/?status=0&start=0 upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" html = self.ha.getstring(url, headers=headers).replace( '\r', '').replace('\n', '').replace('\t', '') html = re.sub(r'\s{2,}', ' ', html) soup = BeautifulSoup(html, 'lxml') tables = soup.select('.order-item') for table in tables: try: order = {} orderid = table.attrs.get('data-id') order['orderid'] = orderid ordertime = table.select_one('.time').get_text() try: order['supplier'] = table.select_one( '.supplier').get_text() except: pass try: order['telphone'] = table.select_one( '.telphone').get_text() except: pass order['pro-detail'] = table.select_one( '.pro-detail').get_text(' ') order['td-date'] = table.select_one( '.td-date').get_text() order['price'] = table.select_one( '.td-cost').get_text() order['status'] = table.select_one( '.td-status').get_text() try: orderurl = table.select_one('caption a')['href'] if orderurl: detail = self._order_detail(orderurl) order['detail'] = detail except: pass res_one = ITRAVELORDER_ONE(self.task, self._appcfg._apptype, self.userid, orderid) res_one.append_orders(order) res_one.ordertime = ordertime.split(':', 1)[1] res_one.host = "www.mafengwo.cn" yield res_one except Exception: self._logger.error( 'Mafengwo one order get fail: {}'.format( traceback.format_exc())) if not tables: break except Exception: self._logger.error('{} get order fail: {}'.format( self.userid, traceback.format_exc())) def _order_detail(self, orderurl): orderurl = 'https://www.mafengwo.cn' + orderurl headers = """ Host: www.mafengwo.cn Connection: keep-alive Pragma: no-cache Cache-Control: no-cache Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 """ html = self.ha.getstring(orderurl, headers=headers).replace('\r', '').replace( '\n', '').replace('\t', '') # html = ha.get_response(orderurl, headers=headers) html = re.sub(r'\s{2,}', ' ', html) soup = BeautifulSoup(html, 'lxml') order = soup.select_one('.order-detail').get_text(' ') return order def logout(self): res = False try: url = 'https://passport.mafengwo.cn/logout.html' html = self._ha.getstring(url, headers=""" Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Host: www.mafengwo.cn Pragma: no-cache Proxy-Connection: keep-alive Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36""" ) if not self._cookie_login(): res = True except Exception: self._logger.error('log out fail: {}'.format( traceback.format_exc())) return res
class Seebug(AutoPluginBase): tasktype = EAutoType.EXPDB def __init__(self): AutoPluginBase.__init__(self) self.ha = HttpAccess() self._get_cookie() def _get_cookie(self): try: ic = False chrome_options = ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('blink-settings=imagesEnabled=false') # chrome_options.add_argument('--no-sandbox') driver = webdriver.Chrome(chrome_options=chrome_options) driver.get('https://www.seebug.org/') time.sleep(2) cookies = driver.get_cookies() l_cookie = '' for cookie in cookies: l_cookie = l_cookie + cookie['name'] + '=' + cookie['value'] + '; ' if cookie['name'] == '__jsl_clearance': ic = True self.cookie = l_cookie # print(self.cookie) self.ha._managedCookie.add_cookies('.seebug.org', self.cookie) if ic: self._logger.info('Got cookie success!') driver.close() except Exception: self._logger.error('Got cookie fail: {}'.format(traceback.format_exc())) def get_bug(self): page = 0 max_page = None while True: page += 1 fail_time = 0 while True: url = f'https://www.seebug.org/vuldb/vulnerabilities?page={page}' html = self.ha.getstring(url, headers=""" Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: www.seebug.org Pragma: no-cache Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36""") if 'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£' in html: print(f'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£{fail_time} * 5s') fail_time += 1 if fail_time == 5: print('Fail time outnumber 5!') break time.sleep(5) else: break soup = BeautifulSoup(html, 'lxml') tbody = soup.select('tbody tr') if not max_page: max_page = soup.select_one('#J-jump-form input').attrs['max'] for tr in tbody: self.bug_detail(tr, url) if int(max_page) <= page: break def bug_detail(self, tr, referer): try: b_url = 'https://www.seebug.org' + tr.select_one('td a').attrs['href'] name = tr.select_one('.vul-title-wrapper a').get_text() datasource = 'seebug' id = tr.select_one('td a').get_text() date_published = tr.select_one('.text-center.datetime.hidden-sm.hidden-xs').get_text() tooltip = tr.select_one('[data-toggle="tooltip"]').attrs['data-original-title'] if tooltip == '¸ßΣ': level = 3 elif tooltip == 'ÖÐΣ': level = 2 else: level = 1 fail_time = 0 res = ExpDB(name, datasource, id, date_published, 0) while True: b_html = self.ha.getstring(b_url, headers=f""" Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: www.seebug.org Pragma: no-cache Referer: {referer} Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36""") if 'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£' in b_html: fail_time += 1 print(f'Äú·ÃÎÊƵÂÊÌ«¸ß£¬ÇëÉÔºòÔÙÊÔ¡£{fail_time} * 45s') if fail_time == 5: print('Fail time outnumber 5!') break t = random.randint(30, 60) time.sleep(t) else: break b_soup = BeautifulSoup(b_html, 'lxml') try: file_data = b_soup.select_one('#j-md-detail').get_text() if 'µÇ¼ºó²é¿´' not in file_data: description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{b_url}\n' self.write_text_string(description, file_data, 'iscan_expdb_doc') except: pass tags = [] try: tag_type = b_soup.select('.bug-msg .col-md-4')[1].select_one('dd').get_text() tags.append(tag_type) res.tags = tags except: pass target = [] try: tar = {} ta_type = b_soup.select_one('.hover-scroll a').get_text().replace('\n', '') ta_type = re.sub(r'\s{2,}', '', ta_type) tar['type'] = ta_type try: version = b_soup.select_one('.hover-scroll').get_text() version = substring(version, '(', ')') tar['version'] = {'list': version} except: pass target.append(tar) res.target = target except: pass cve_id = b_soup.select('.bug-msg .col-md-4')[2].select_one('dd').get_text() code = [] if '²¹³ä' not in cve_id: cve = {} cve['code_type'] = 'cve' cve['code'] = cve_id.replace('\n', '') code.append(cve) res.code = code author = {} author['name'] = b_soup.select('.bug-msg .col-md-4')[1].select('dd')[3].get_text() author['name'] = re.sub(r'\s{2,}', '', author['name']) res['author'] = author try: poc = b_soup.select_one('#J-poc').get_text() description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{b_url}\n' self.write_text_string(description, poc, 'iscan_expdb_exp') except: pass print(name, datasource, id, date_published, tooltip, level, tags, code, author) time.sleep(3) except Exception: self._logger.error('Got bug detail fail: {}'.format(traceback.format_exc())) def start(self): self.get_bug()
class Exploit(AutoPluginBase): tasktype = EAutoType.EXPDB def __init__(self): AutoPluginBase.__init__(self) self.ha = HttpAccess(0.5) self.ha._managedCookie.add_cookies( '.exploit-db.com', '_ga=GA1.3.659193532.1562029495; _gid=GA1.3.817890876.1562029495; _gat=1; XSRF-TOKEN=eyJpdiI6IkRQd3M1RHljcnhEM2hTVzhiMlcyV2c9PSIsInZhbHVlIjoiNzAwQVdWWFR3ck9oenBHWUd2a2NENTdrNXRqeWtOMU9iWk9pRGRxdVRFcGJyMmc0Q3gwQzBjbVg2bEdDWU5HTSIsIm1hYyI6IjFlMjRkMTcxMmFhODE1NzRmMDc0YWJlZTUzZTRlYTlmZjMyYTU1NDZjMjE5NjdkOTkzMGFjNDZlMzBhMWVjMjIifQ%3D%3D; exploit_database_session=eyJpdiI6IkE0QmJ3alZ3S0RFd3FHcnhQOTQzVGc9PSIsInZhbHVlIjoiYkpDNnh5azdtcmlOYms5cW5sMW9DUnhXYmhmSk9iZlVQa3pxOUVwUEpxQXhUeThMZUF3ZVBpMGRURzNcL1grNHAiLCJtYWMiOiIyODAzMmJjNjUzMWYyYzA0NmY0ZTYwMzFhYjg1YWUyOTc0OTMzNzBhYmYyZDc2MTkwZDYzYWY5Y2M5ZDhhMDI1In0%3D' ) self.page_queue = queue.Queue() self.detailsuffix = 'iscan_expdb' # 带文件体的回馈 self.scriptsuffix = 'iscan_expdb_exp' self.sourcecodesuffix = 'iscan_expdb_app' def get_url(self): draw = 1 start = 0 t = int( datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000) url = f'https://www.exploit-db.com/?draw={draw}&columns%5B0%5D%5Bdata%5D=date_published&columns%5B0%5D%5Bname%5D=date_published&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=download&columns%5B1%5D%5Bname%5D=download&columns%5B1%5D%5Bsearchable%5D=false&columns%5B1%5D%5Borderable%5D=false&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=application_md5&columns%5B2%5D%5Bname%5D=application_md5&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=false&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=verified&columns%5B3%5D%5Bname%5D=verified&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=false&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=description&columns%5B4%5D%5Bname%5D=description&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=false&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=type_id&columns%5B5%5D%5Bname%5D=type_id&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=false&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=platform_id&columns%5B6%5D%5Bname%5D=platform_id&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=false&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=author_id&columns%5B7%5D%5Bname%5D=author_id&columns%5B7%5D%5Bsearchable%5D=false&columns%5B7%5D%5Borderable%5D=false&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=code&columns%5B8%5D%5Bname%5D=code.code&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=id&columns%5B9%5D%5Bname%5D=id&columns%5B9%5D%5Bsearchable%5D=false&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=9&order%5B0%5D%5Bdir%5D=desc&start={start}&length=120&search%5Bvalue%5D=&search%5Bregex%5D=false&author=&port=&type=&tag=&platform=&_={t}' headers = """ accept: application/json, text/javascript, */*; q=0.01 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: https://www.exploit-db.com/ user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 x-requested-with: XMLHttpRequest""" html = self.ha.getstring(url, headers=headers) jshtml = json.loads(html) total = jshtml['recordsTotal'] max = int(total / 120) + 1 for draw in range(1, max + 1): start = (draw - 1) * 120 self.page_queue.put((draw, start)) print('Got all page! start download!') def get_onepage(self, draw, start): t = int( datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000) url = f'https://www.exploit-db.com/?draw={draw}&columns%5B0%5D%5Bdata%5D=date_published&columns%5B0%5D%5Bname%5D=date_published&columns%5B0%5D%5Bsearchable%5D=true&columns%5B0%5D%5Borderable%5D=true&columns%5B0%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B0%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B1%5D%5Bdata%5D=download&columns%5B1%5D%5Bname%5D=download&columns%5B1%5D%5Bsearchable%5D=false&columns%5B1%5D%5Borderable%5D=false&columns%5B1%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B1%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B2%5D%5Bdata%5D=application_md5&columns%5B2%5D%5Bname%5D=application_md5&columns%5B2%5D%5Bsearchable%5D=true&columns%5B2%5D%5Borderable%5D=false&columns%5B2%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B2%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B3%5D%5Bdata%5D=verified&columns%5B3%5D%5Bname%5D=verified&columns%5B3%5D%5Bsearchable%5D=true&columns%5B3%5D%5Borderable%5D=false&columns%5B3%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B3%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B4%5D%5Bdata%5D=description&columns%5B4%5D%5Bname%5D=description&columns%5B4%5D%5Bsearchable%5D=true&columns%5B4%5D%5Borderable%5D=false&columns%5B4%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B4%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B5%5D%5Bdata%5D=type_id&columns%5B5%5D%5Bname%5D=type_id&columns%5B5%5D%5Bsearchable%5D=true&columns%5B5%5D%5Borderable%5D=false&columns%5B5%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B5%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B6%5D%5Bdata%5D=platform_id&columns%5B6%5D%5Bname%5D=platform_id&columns%5B6%5D%5Bsearchable%5D=true&columns%5B6%5D%5Borderable%5D=false&columns%5B6%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B6%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B7%5D%5Bdata%5D=author_id&columns%5B7%5D%5Bname%5D=author_id&columns%5B7%5D%5Bsearchable%5D=false&columns%5B7%5D%5Borderable%5D=false&columns%5B7%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B7%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B8%5D%5Bdata%5D=code&columns%5B8%5D%5Bname%5D=code.code&columns%5B8%5D%5Bsearchable%5D=true&columns%5B8%5D%5Borderable%5D=true&columns%5B8%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B8%5D%5Bsearch%5D%5Bregex%5D=false&columns%5B9%5D%5Bdata%5D=id&columns%5B9%5D%5Bname%5D=id&columns%5B9%5D%5Bsearchable%5D=false&columns%5B9%5D%5Borderable%5D=true&columns%5B9%5D%5Bsearch%5D%5Bvalue%5D=&columns%5B9%5D%5Bsearch%5D%5Bregex%5D=false&order%5B0%5D%5Bcolumn%5D=9&order%5B0%5D%5Bdir%5D=desc&start={start}&length=120&search%5Bvalue%5D=&search%5Bregex%5D=false&author=&port=&type=&tag=&platform=&_={t}' headers = """ accept: application/json, text/javascript, */*; q=0.01 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: https://www.exploit-db.com/ user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 x-requested-with: XMLHttpRequest""" html = self.ha.getstring(url, headers=headers) jshtml = json.loads(html) if not jshtml['data']: return # print(jshtml['data'][0]) for data in jshtml['data']: id = data['id'] if self.is_expdbdata_unique(str(id) + 'exploit'): return name = data['description'][1] datasource = 'exploitdb' date_published = data['date_published'] verified = data['verified'] description = data['description'] tags = [] if data['tags']: tag = data['tags'][0]['title'] tag = self.tag_mapping(tag) tags.append(tag) version = {} version['list'] = [] ver = substring(name, ' ', ' - ') if ver: if '.' in ver: version['list'].append(ver) type = data['type']['name'] platform = data['platform_id'] target = [] tar = {} tar['type'] = type tar['platform'] = platform tar['version'] = version target.append(tar) author = {} author['name'] = data['author']['name'] code = [] if data['code']: for dat in data['code']: co = {} co['code_type'] = dat['code_type'] co['code'] = dat['code'] code.append(co) app = [] if data['application_md5'] and data['application_path']: ap = {} application_md5 = data['application_md5'] ap['name'] = substring(application_md5, '<a href="/apps/', '"') ap['url'] = 'https://www.exploit-db.com/apps/' + ap['name'] ap['path'] = self.download_app(ap['url'], id, ap['name']) app.append(ap) exploit = [] exp = {} exp['name'] = str(id) + '.txt' exp['url'] = 'https://www.exploit-db.com/download/' + str(id) exp['path'] = self.download_exploit(exp['url'], id, exp['name']) exploit.append(exp) res = {} res['name'] = name res['datasource'] = datasource res['id'] = str(id) res['date_published'] = date_published res['verified'] = verified res['description'] = description res['tags'] = tags res['target'] = target res['author'] = author res['code'] = code res['app'] = app res['exploit'] = exploit self.write_text(res, 'iscan_expdb') # with open(f'./exploit/{id}.iscan_expdb', 'w') as f: # f.write(json.dumps(res)) self.store_expdbdata_unique(str(id) + 'exploit') def download_exploit(self, url, id, name): failnum = 0 while True: try: headers = f""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: https://www.exploit-db.com/exploits/{id} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""" html = self.ha.get_response_stream(url, headers=headers) # with open(f'./exploit/{id}.iscan_expdb_exp', 'w') as f: description = f'datasource: exploitdb\nid: {id}\nname: {name}\nurl:{url}\n' data = html.read() outname = self.write_text_binary(description, data, 'iscan_expdb_exp') print(f"Output data, filename:{outname}") if failnum != 0: self._logger.info( f'ID: {id} try download expdb_exp again success!') return outname except Exception: failnum += 1 self._logger.error( f'ID: {id} download expdb_exp error: {traceback.format_exc()}\ntry download again!' ) if failnum > 3: self._logger.info( f'ID: {id} download expdb_exp error over 3 times!') return None time.sleep(3) # f.write(description) # # with open(f'./exploit/{id}.iscan_expdb_exp', 'ab') as f: # f.write(html.read()) def download_app(self, url, id, name): failnum = 0 while True: try: headers = f""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: https://www.exploit-db.com/exploits/{id} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""" html = self.ha.get_response_stream(url, headers=headers) # with open(f'./exploit/{id}.iscan_expdb_app', 'w') as f: description = f'datasource: exploitdb\nid: {id}\nname: {name}\nurl:{url}\n' # f.write(description) data = html.read() # with open(f'./exploit/{id}.iscan_expdb_app', 'ab') as f: # f.write(html.read()) outname = self.write_text_binary(description, data, 'iscan_expdb_app') print(f"Output data, filename:{outname}") if failnum != 0: self._logger.info( f'ID: {id} try download expdb_app again success!') return outname except Exception: failnum += 1 self._logger.error( f'ID: {id} download expdb_app error: {traceback.format_exc()}\ntry download again!' ) if failnum > 3: self._logger.info( f'ID: {id} download expdb_app error over 3 times!') return None time.sleep(3) def run(self): got = False none_count = 0 while True: try: got = False draw, start = self.page_queue.get(timeout=3) got = True self.get_onepage(draw, start) except queue.Empty: none_count += 1 if none_count >= 5: break time.sleep(5) except Exception: print("获取出错: page={} error={}".format(draw, traceback.format_exc())) finally: if got: self.page_queue.task_done() def __del__(self): print("EXPdb misson accomplished") def start(self): self.get_url() ths = [ threading.Thread(target=self.run, name=f'thread{i}') for i in range(7) ] for el in ths: el.start() for el in ths: el.join()
class SpiderSuning(SpiderShoppingBase): def __init__(self, task, appcfg, clientid): super(SpiderSuning, self).__init__(task, appcfg, clientid) self._ha = HttpAccess() self.userid = "" self.time = datetime.datetime.now( pytz.timezone("Asia/Shanghai")).strftime("%Y-%m-%d %H:%M:%S") if self.task.cookie: self._ha._managedCookie.add_cookies("suning.com", self.task.cookie) def _cookie_login(self): """ cookie登陆测试 """ res = False url = "http://my.suning.com/person.do" headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 Host: my.suning.com Proxy-Connection: keep-alive Referer: http://my.suning.com/person.do Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36 """ try: r = self._ha.getstring(url, headers=headers, timeout=10) soup = BeautifulSoup(r, "lxml") patuserid = re.compile(r"您的会员编号为:(.*?),", re.S) userid = patuserid.findall(str(soup))[0] if userid: self.userid = userid + "-suning" res = True except: self._logger.error( f"Cookie login error, err:{traceback.format_exc()}") return res def _check_registration(self): """ 查询手机号是否注册了suning :param account: :return: """ t = time.strftime("%Y-%m-%d %H:%M:%S") try: headers = """ Accept: application/json, text/javascript, */*; q=0.01 Content-Type: application/x-www-form-urlencoded; charset=UTF-8 Origin: http://passport.suning.com Referer: http://passport.suning.com/ids/login User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36 X-Requested-With: XMLHttpRequest """ url = "http://passport.suning.com/ids/login" postdata = f"jsonViewType=true&username={self.task.phone}&password=&password2=Ujsa1wIs9Jnzn%2Fc%2BqT%2FyQldPMMWVrWviEorr1ku8VnnZGydpUB55QyQZso%2B1%2BZYP97u1MIlXMoBbCTkKRMURME7dMO%2BGIuA6RwVOmFCawDE%2FMYMtuO1PmhgwRlxurrcKF8uBep9Sf8D4dgTv7w%2F8rYqrI3cxUTWmpedBArbxQ6Y%3D&loginTheme=defaultTheme&service=&rememberMe=true&client=app&sceneId=logonImg&detect=mmds_3nZnnnuzF3MnnnuzT3Znnniz83Znnn3zP3MnnnnzL3nZnnnnzS3ZnnnnzN3Znnnvz13Mnnn0zq3Ynnn9zE3nZnnnYzR3Znnn-z43mnnnrzr3YnnnczU3u3nncz-3nvinnczU3MnnnczU3bCnnczr3Mnnnfz43Znnnhz433ZnnnEz43MnnnXzf3Znnn.zf3Znnn1zO3Ynnnkzh3nmnnnVzB3YnnnPzB3ZnnnJzB3MnnnbzB3MnnnbzB3nq3nnFzh3ZnnnFzO3YnnnKzR3mnnnpzf3Znnnlzf3nQnnnlzt3fnnnlz43n3nnpz43Kinnpzc3znnnKzc3nMnnnFzr3YnnnTz-3o3nnJz-3CnnnLz_3MnnnNzY3nZnnnCzY3Mnnn1GY3ZnnnuGY3ZnnnA7Y3Mnnn67Y3iYnnnO7Y3VnnnO7Y3FnnnO7Z3CnnnR7M3Ynnnt753nZnnn-7o3Mnnn_7I3-nnnY7I35nnn97I3Mnnno7I3nXpnno7I3MnnnY7Z3Znnnt7U3Znnn67t3Mnnnw7R3nZnnn17h3YnnnA7h393nns7h3znnnW7h3Mnnnw7h3nMnnn.7B3QnnnA7B3mnnnT763ZnnnoGX3Ynnn1Gs33Mnnnizk3MnnnZzd3Ynnntza3Mnnnhzj3LAnnuCni3Ynnn7C3imnnnGCiiPnnnzC7i9nnneCCiMnnn5Coi3Znnn5CMiYnnn5CZiynnn5CYiCnnn5C_i5nnn5CUinmnnneCfiZnnnCCBiMnnnzCEi_3nnzC6iT3nnzCgi36nnnzCgiMnnnzCXiYnnnzCqiJCnnGCqiMnnnuCdinMnnn~zQiYYnngzLiMnnn2zViMnnn2zViZnnn2zSinl3nnXzaiennnXzaiMnnnqzdiZnnnqzNiMnnnqzNinxYa2E~~j2tjE7R.fE2EjY.Pj7Y2PRPYf.EaYaz~zf6~Eju7.xa2jPxRttxDYYR~auzz~ajx8Pnn~C_36enniCuinXoncCviZ23nk0UiMgnn4C3iMjnnk0viQConMCnirMnn4zgiMG3nwztifINnyzMi853npz43353nhzj3cY3nqzNixa22txaEEjPERP2EPutxxn3iuv7GzC0xjxtxa22tx~utxaxtxGBI3.uZinnn1znn.u9innnTvnn.uz3nnnnnnn.usinnnC5nn.uc3nnn6inn.uW3nnnnvnn.uminnnT7nn.uJnnnnnnnn.usinnn67nn.uT3nnnx2jPxs7mnnnnnvnnniu3nnnnnSgnndnnnnnnnh2nni5nnnnnn4nnnEqnnnnnnJqnnfHInnnnnUMnnAI3nnnnni7nnunnnnnnninnnKt7nnnnnlt7niz1nnnnne53nxPERxMTOnyyxLK03GCniMnnnGCiiMnnnGCGiZnnnvCCiZnnniC0i3Mnnn3CeiYnnn3CIiZnnnnCoimnnnyz5iZnnnyz9inYnnnyzMibCnnTzMi9nnnNzMiYnnntzMiZnnnPGmi3ZnnnmG5iZnnnQ75iMnnnB75iZnnnY75iMnnn97oingnnn97IiMnnn970iYnnn970iMnnn97Ci.3nn57CinZnnni70imnnnTvIiZnnnSv9iZnnnkvMiMnnnsvYi3ZnnnWvYi83nnWv_ikvnnWv_iN4nnzGUiEnnnGGUiio3nn7GUiYnnnvGUifnnnuGriCnnnvGci6nnnzGti3ZnnnoGfimnnnYGfiYnnnRGRiMnnn2GRiZnnn1Gti3ZnnnVG-iMnnnPGMiYnnnKGIiZnnnyGziZnnnizii3mnnnvzniZnnn7zl3Znnn7zp3Qnnn7zK3Mnnnvt~EzLzt~u2zF_._796d0c53-7e52-42b9-a978-a8944ba6c172_._&dfpToken=THP7fd1696fcef06aX5E3e4d3&terminal=PC&loginChannel=208000103001" response = self._ha.getstring(url, headers=headers, req_data=postdata) if '"errorCode":"badPassword.msg1"' in response: self._write_task_back(ECommandStatus.Succeed, "Registered", t, EBackResult.Registerd) elif '"errorCode":"needVerifyCode"' in response: self._write_task_back( ECommandStatus.Failed, "Need VerifyCode!", t, EBackResult.CheckRegisterdFail, ) else: self._write_task_back(ECommandStatus.Succeed, "Not Registered", t, EBackResult.UnRegisterd) except Exception: self._logger.error("Check registration fail: {}".format( traceback.format_exc())) self._write_task_back( ECommandStatus.Failed, "Check registration fail", t, EBackResult.CheckRegisterdFail, ) return def _get_orders(self): """ 获取订单信息 """ headers = """ Accept: text/html, */*; q=0.01 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: order.suning.com Pragma: no-cache sec-ch-ua: "Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87" sec-ch-ua-mobile: ?0 Sec-Fetch-Dest: empty Sec-Fetch-Mode: cors Sec-Fetch-Site: same-origin User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 X-Requested-With: XMLHttpRequest """ thistime = time.strftime("%Y-%m-%d") patorderlist = re.compile(r'<div class="table-list">') page = 1 while True: try: url = f"https://order.suning.com/order/queryOrderList.do?transStatus=&pageNumber={page}&condition=&startDate=2009-01-01&endDate={thistime}&orderType=" html = self._ha.getstring(url, headers=headers, timeout=10) orderlist = patorderlist.search(html) if orderlist: soup = BeautifulSoup(html, "lxml") orders = soup.select(".table-list .table-box") for order in orders: try: dic1 = {} patid = re.compile(r'id="table_box_(.*?)"', re.S) orderid = patid.findall(str(order))[0] ordertime = ( order.select_one(".item span").get_text() + " " + "00:00:00") dic1["shop"] = order.select( ".item span")[1].get_text() dic1["rowspan"] = order.select_one( ".total-price").get("rowspan") dic1["price"] = order.select_one( ".total-price span").get_text() dic1["含运费"] = order.select_one( ".total-price em").get_text() dic1["status"] = order.select_one( ".state .opt-item").get_text() dic1["contact"] = (order.select_one( ".tax-tip").get_text(" ").replace( "\n", "").replace("\r", "")) dic = [] o = order.select("table .order-info") for item in o: di = {} di["title"] = item.select_one( '[name="pname_"]')["title"] di["price"] = item.select_one( ".price span").get_text() di["amount"] = (item.select_one( ".amount").get_text().strip()) dic.append(di) dic1["goods"] = dic res_one = ISHOPPING_ONE(self.task, self._appcfg._apptype, self.userid, orderid) res_one.ordertime = ordertime res_one.append_orders(dic1) res_one.host = "www.suning.com" yield res_one except: self._logger.error( f"Parser order error\nerr:\n{traceback.format_exc()}" ) continue time.sleep(1) page += 1 else: break except Exception: self._logger.error("{} got order fail: {}".format( self.userid, traceback.format_exc())) def _get_profile(self): try: url = "http://my.suning.com/msi2pc/memberInfo.do" headers = """ Accept: application/json, text/javascript, */*; q=0.01 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: my.suning.com Pragma: no-cache Referer: http://my.suning.com/ User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 X-Requested-With: XMLHttpRequest """ r = self._ha.getstring(url, headers=headers, timeout=10) rd = json.loads(r) nickname = rd.get("nickName") res = PROFILE(self._clientid, self.task, self._appcfg._apptype, self.userid) res.nickname = nickname yield res except Exception: self._logger.error("{} got profile fail: {}".format( self.userid, traceback.format_exc()))
def __init__(self): AutoPluginBase.__init__(self) self.ha = HttpAccess() self._get_cookie()
class Seebug(AutoPluginBase): tasktype = EAutoType.EXPDB def __init__(self): AutoPluginBase.__init__(self) self.ha = HttpAccess() self._get_cookie() def _get_cookie(self): try: ic = False chrome_options = ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('blink-settings=imagesEnabled=false') # chrome_options.add_argument('--no-sandbox') driver = webdriver.Chrome(chrome_options=chrome_options) success = False check_num = 1 while True: try: driver.get('https://cn.0day.today/') time.sleep(5 * check_num) driver.find_element_by_css_selector( 'body > div > div.agree > div:nth-child(9) > div:nth-child(3) > form > input').click() success = True break except: check_num += 1 if check_num == 4: break if success: cookies = driver.get_cookies() l_cookie = '' for cookie in cookies: l_cookie = l_cookie + cookie['name'] + '=' + cookie['value'] + '; ' if ic: self._logger.info('Got cookie success!') self.ha._managedCookie.add_cookies('0day.today', l_cookie) else: self._logger.info('Got cookie fail!') driver.close() except Exception: self._logger.error('Got cookie fail: {}'.format(traceback.format_exc())) def get_bug(self): failnum = 0 while True: url = 'https://cn.0day.today/platforms' headers = """ Host: cn.0day.today Connection: keep-alive Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 Accept-Language: zh-CN,zh;q=0.9 """ html = self.ha.getstring(url, headers=headers) if "value='是的我同意'" in html or 'Just a moment...' in html: failnum += 1 if failnum > 3: self._logger.error('Requsts fail over 3 times!') return self._logger.info('Cookie lose efficacy!') self._get_cookie() else: break soup = BeautifulSoup(html, 'lxml') tables = soup.select('.category_title a') for a in tables: href = a.attrs['href'] if href == '/platforms' or href == '/webapps': continue url0 = 'https://cn.0day.today' + href page = 0 last_url = None while True: page += 1 url = url0 + '/' + str(page) html = self.ha.getstring(url, headers=f""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: {url0} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""") soup = BeautifulSoup(html, 'lxml') exploits = soup.select('.ExploitTableContent') for exploit in exploits: d_href = exploit.select_one('h3 a').attrs['href'] id = d_href.split('/')[-1] if self.is_data_unique(str(id) + '0day'): return name = exploit.select_one('h3 a').get_text() if '你可以免费使用此漏洞利用' in str(exploit): detail, referer = self.get_description(href, url) if detail: description = f'datasource: 0day\nid: {id}\nname: {name}\nurl:{url}\n' self.write_text_string(description, detail, 'iscan_expdb_doc') else: continue date = exploit.select_one('.td a').get_text() date_d = date.split('-')[0] date_y = date.split('-')[-1] date = date_y + date.replace(date_d, '').replace(date_y, '') + date_d verified = soup.select_one('.tips_verified_') if verified: verified = 0 else: verified = 1 level_t = substring(str(exploit), "class='tips_risk_color_", "'>安全风险级别") if level_t in ['0', '1']: level = 1 elif level_t == '2': level = 2 else: level = 3 res = ExpDB(name, '0day', id, date, verified) res.level = level res, poc, url = self.get_detail(id, referer, res) description = f'datasource: seebug\nid: {id}\nname: {name}\nurl:{url}\n' self.write_text_string(description, poc, 'iscan_expdb_exp') self.write_text(res, 'iscan_expdb') self.store_data_unique(str(id) + '0day') if not last_url: last_url = 'https://cn.0day.today' + soup.select('.pages a')[-1].attrs['href'] if last_url == url: break def get_description(self, href, referer): try: d_url = 'https://cn.0day.today/exploit' + href html = self.ha.getstring(d_url, headers=f""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: {referer} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""") description = '' if "<div class='td'>描述</div>" in html: try: description = re.findall(r"<div class='td'>描述</div>.*?>(.*?)</div>", html, re.S)[0] except Exception: self._logger.error('Get description fail: {}'.format(traceback.format_exc())) return description, d_url except Exception: self._logger.error(f'Description fail:{traceback.format_exc()}') def get_detail(self, id, referer, res): try: e_url = 'https://cn.0day.today/exploit/' + id e_html = self.ha.getstring(e_url, headers=f""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: {referer} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36""") e_soup = BeautifulSoup(e_html, 'lxml') l1 = e_soup.select( "[style='float:left; width:150px; overflow:hidden; margin:5px 0px 0px 0px;']") author = {} author['name'] = l1[0].get_text() tags = [] target = {} target['type'] = l1[1].get_text() target['platform'] = l1[2].get_text() tags.append(self.tag_mapping(target['type'])) l3 = e_soup.select("[style='float:left; margin:5px 0px 0px 0px;']") code = [] co = {} co['code_type'] = '0day-ID' co['code'] = l3[0].get_text() code.append(co) try: co['code_type'] = 'cve' co['code'] = l3[1].get_text(' ') code.append(co) except: pass res.tags = tags res.target = target res.author = author res.code = code poc = e_soup.select_one('pre').get_text() return res, poc, e_url except Exception: self._logger.error(f'ID: {id} get detail fail:{traceback.format_exc()}') def start(self): self.get_bug()
def __init__(self): ScoutPlugBase.__init__(self) self.ha = HttpAccess() self.proxy_List = [] self.is_first = True self.cookie = 'CGIC=InZ0ZXh0L2h0bWwsYXBwbGljYXRpb24veGh0bWwreG1sLGFwcGxpY2F0aW9uL3htbDtxPTAuOSxpbWFnZS93ZWJwLGltYWdlL2FwbmcsKi8qO3E9MC44LGFwcGxpY2F0aW9uL3NpZ25lZC1leGNoYW5nZTt2PWIz; HSID=AJAuKs_OF4zjl_QiH; SSID=AOt5d7crf8ErXjqt5; APISID=ywI9lJQCGtyX_UgX/AVPHvH0i8rRjLpy3q; SAPISID=qPfZ04-88QBXo551/Aw4J_YiM8bTsz4m9H; CONSENT=YES+CN.zh-CN+; SEARCH_SAMESITE=CgQI3o0B; _gcl_au=1.1.423014040.1568171077; NID=188=gkZ2tXD8e7EWvfFwVQ9g0g5Ny1h2S1gY62TzUODI6ypQxzhcyqwfRgkZxnuA9c1kHGpLn9XDvmuJN4_kBEPJYoLSmv_USmqai_6IYBQ9RxxBYr5HXlDo60sFVnUMFqU9L9Im9jeVWz8KjdzAWdMaXDwAlUN7VZbILOZ8qm_0etxVhmAyGNvBzBI9C3ZvCG3M6OV6Gc3e0QoTqtsq87egQ8cjl-riBP-A9hPp8v5_SCohQA; SID=oge9GlNhzXILiY9MW_P2brb9HPE0noiUCwKqq4Z-w4IFcUYyBLodJCIIfQl8R8IqgQclJQ.; 1P_JAR=2019-09-20-06; SIDCC=AN0-TYu9Bf7Mrwg4TJmxordC1vli8GeqPk_MiHeE-3rahVe8d0bVfvQcfHx9GiSL3t_0JBmJQd0'
def __init__(self): self._ha = HttpAccess() cookie = ''' personalization_id="v1_pmt2sntu/a8PCORtco8eVg=="; guest_id=v1%3A156194413511829253; ct0=6bb1b8031784f711388377a485cd5bf9; _ga=GA1.2.1901222848.1561944140; _gid=GA1.2.214369962.1561944140; ads_prefs="HBERAAA="; kdt=l6Dkc64O0CPl4qCVtYAuXjXrtxkII2VUjRNqMOfT; remember_checked_on=1; _twitter_sess=BAh7CiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCGsaIKtrAToMY3NyZl9p%250AZCIlNzIyZDlhODdlMTVjYjU3MTRkYTBlY2Y4NGQ5MDQzMjQ6B2lkIiVjZTgw%250ANDQ0ZmIyOTAyY2U0MjQ0NjI4ZTFmNjU0MjgwOToJdXNlcmwrCQHglUis2NwN--8ae08c231e9599f1c5868e6518664b987d936c79; twid="u=998911451833163777"; auth_token=3378bb7bfd90d8bfb9de00b7fb7110633a256852; csrf_same_site_set=1; lang=en; csrf_same_site=1; _gat=1 ''' self._ha._managedCookie.add_cookies('twitter.com', cookie)
def __init__(self, task: IscoutTask, loggername: str = None): ScoutPlugBase.__init__(self, loggername=loggername) if not isinstance(task, IscoutTask): raise Exception("Invalid IscoutTask") self._dtools = dtools self.task: IscoutTask = task self._ha: HttpAccess = HttpAccess() self._userid: str = None # 网站对用户的唯一识别标识 self._account: str = None # 可以用于登陆的账号名 self._username: str = None # 用户昵称 self._globaltelcode: str = None # 国际区号 self._phone: str = None # 电话 self.phone: str = None self._url: str = None self._host: str = None self._cookie: str = None self.is_new_facebook = False # 是不是新版facebook # web needed fileds self._pc = None self._rev = None self.lsd = None self._req = helper_num.MakeNumber(FbBase._msgr_charsets, 20) self.fb_dtsg = None self.fb_dtsg_ag = None self.jazoest = None self._spin_r = None self._spin_t = None self._spin_b = None self.hsi = None self._s = None self.docid = None self.ajaxpipe_token = None # 新版没有这个参数 self.quickling_ver = None self.docid_profile = None self.docid_contact = None self.docid_group = None self.homepage = None # 缓存所有init页面里的资源js脚本,,用于查找各种docid self._jspages: dict = {} self._jspages_listpage = None self._jspages_itemurls: dict = {} self._jspages_ok: bool = False self._jspages_locker = threading.Lock() # sms login contract fields... self.hash_ = None self.sms_redir = None # data self.is_messenger_only_user: bool = False # 是否仅为messenger用户 self.is_deactived_allowed_on_messenger: bool = False # 是否为messenger禁用用户? # 以下数据暂时不存在多线程并发问题,后面需要的话加个锁 # self._contacts: dict = {} # 好友列表,以好友userid为key # self._chatlogs: dict = {} # 聊天记录,以好友userid为key # self._groups: dict = {} # 群组,以群组id为key # self._resources: dict = {} # 资源,需存库去重,实现增量下载 self._exist_msgtypes: dict = {} # 消息类型有哪些,方便调试
def setUp(self): # print('Previous condition for each') if not isinstance(self._ha, HttpAccess): self._ha = HttpAccess()