def preparation(self, resp): form = resp.xpath(u'//*[@id="aspnetForm"]') form_id = form.xpath(u"./@id").extract_first() formdata = { # u"__EVENTARGUMENT": u"saveToWindow=format:html;", u"__EVENTARGUMENT": u"saveToWindow=format:csv;", u"__EVENTTARGET": u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount": u"25", # u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"html", # u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"html", # u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Html", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"csv", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"csv", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Csv", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState": u"{"windowsState":"0:0:-1:0:0:0:-10000:-10000:1:0:0:0"}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State": u"{"CustomCallback":""}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState": u"{"windowsState":"0:0:-1:430:140:1:68:165:1:0:0:0"}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl": u"{"activeTabIndex":1}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu": u"{"selectedItemIndexPath":"","checkedState":""}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer": u"{"drillDown":{},"parameters":{},"cacheKey":"","currentPageIndex":0}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid": u"{"keys":[],"callbackState":"BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB","selection":""}", } yield SplashFormRequest.from_response( response=resp, formid=form_id, formdata=formdata, callback=self.get_station_data, # callback=self.parse_tags, dont_click=True, endpoint=u"execute", cache_args=[u"lua_source"], args={ u"http_method": u"POST", u"headers": { u"Content-Type": u"application/x-www-form-urlencoded", }, u"lua_source": script, })
def parse(self, response): if not self.credentials(): self.logger.error('add the credentials first!') return yield SplashFormRequest.from_response( response, formdata={ 'txtUser': self.txtUser, 'txtPassword': self.txtPassword, 'txtDependencia': self.txtDependencia, 'ComboTipoDep': self.ComboTipoDep, 'btnAceptar': self.btnAceptar }, callback=self.parse_login, endpoint='execute', cache_args=['lua_source'], session_id='dummy', args={ 'html': 1, 'lua_source': self.getFileContentAsString(self.lua_dir + 'init.lua'), 'wait': 5 })
def parse(self, response): csrf_token = response.xpath('//input[@name="csrf_token"]/@value').get() yield SplashFormRequest.from_response(response, formxpath='//form', formdata={ 'csrf_token': csrf_token, 'username': '******', 'password': '******' }, callback=self.after_login)
def parse(self, response): if self.password == '': self.logger.error('first add a password') return return SplashFormRequest.from_response( response, formdata= { 'email' : self.user_name, 'pass' : self.password }, callback = self.after_login, formid = 'loginform', dont_process_response = True )
def login(self, response): # 调用splash的SplashFormRequest,提交表单参数 yield SplashFormRequest.from_response( response=response, url=self.login_url, endpoint='execute', formdata={ 'user_agent': '', 'cookie': '' }, args={ 'wait': 30, 'lua_source': lg, 'proxy': 'http://:1080', 'search_key': self.search_key }, callback=self.after_login, errback=self.error_parse, )
def login(self, response): # 调用splash的SplashFormRequest,提交表单参数(user、password) yield SplashFormRequest.from_response( response=response, url=self.login_url, formdata={ 'email': user, 'password': password }, endpoint='execute', args={ 'wait': 30, 'lua_source': log_lua_script, 'user_name': user, 'user_password': password, 'proxy': 'http://:1080' }, callback=self.after_login, errback=self.error_parse, )
def test_form_request_from_response(): # Copied from scrapy tests (test_from_response_submit_not_first_clickable) def _buildresponse(body, **kwargs): kwargs.setdefault('body', body) kwargs.setdefault('url', 'http://example.com') kwargs.setdefault('encoding', 'utf-8') return HtmlResponse(**kwargs) response = _buildresponse( """<form action="get.php" method="GET"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value="3"> <input type="submit" name="clickable2" value="clicked2"> </form>""") req = SplashFormRequest.from_response( response, formdata={'two': '2'}, clickdata={'name': 'clickable2'}) assert req.method == 'GET' assert req.meta['splash']['args']['url'] == req.url fs = cgi.parse_qs(req.url.partition('?')[2], True) assert fs['clickable2'] == ['clicked2'] assert 'clickable1' not in fs assert fs['one'] == ['1'] assert fs['two'] == ['2']
def login(self, response): logging.debug('Screenshot: %s', response.data['png']) try: headers = json.loads(self.rpt_mp.headers) except json.decoder.JSONDecodeError as err: headers = {} formdata, url, method = fill_login_form(response.url, response.text, response.meta['username'], response.meta['password']) # because following request is a new Splash request response.meta.pop('splash') response.meta.pop('_splash_processed') return SplashFormRequest.from_response(response, endpoint='render.json', args=self.splash_args, headers=headers, dont_filter=True, formname='fm', formdata=formdata, meta=response.meta, callback=self.after_login)
def parse(self, response): if not self.link_extractor.matches(response.url): return request_meta = { 'from_search': response.meta.get('is_search'), 'extracted_at': response.url, } def request(url, meta=None, **kwargs): meta = meta or {} meta.update(request_meta) return self.make_request(url, meta=meta, **kwargs) # Not using formasaurus for Inferlink forms processing forms = (formasaurus.extract_forms(response.text) if response.text else []) # for x in forms: # print(etree.tostring(x[0], pretty_print=True)) # print(x[1]) metadata = dict( is_page=response.meta.get('is_page', False), is_onclick=response.meta.get('is_onclick', False), is_iframe=response.meta.get('is_iframe', False), is_search=response.meta.get('is_search', False), from_search=response.meta.get('from_search', False), extracted_at=response.meta.get('extracted_at', None), depth=response.meta.get('depth', None), priority=response.request.priority, forms=[meta for _, meta in forms], screenshot=self._take_screenshot(response), ) # print('Response.url -- ', response.url) # print('start_urls[0] -- ', self.start_urls[0]) # if response.url == self.start_urls[0]: # print('its the first url') ## yield self.text_cdr_item( ## response, follow_urls=[], metadata=metadata) # print('Google request again') ## yield request('http://www.google.com') # if there is no file # then pages can be an empty array # for will not do nothing # for each page say page_valid=true beforehand # and also start with a null urlExtractionInfo page_valid = True url_extract_info = None forms = [] # page classification #pages = self.pages_data['pagesInfo'] for pg in self.pages_cfg: page_name = pg.get('pageName') # print(page_name) # check if the page gets classified url_regex = pg.get('urlRegex') content_regex = pg.get('contentRegex') page_valid = True if url_regex: # print(search_re(url_regex, response.url)) if not search_re(url_regex, response.url): page_valid = False if content_regex: # print('content_regex not null') # print(search_re(content_regex, response.body.decode("utf-8"))) if not search_re(content_regex, response.body.decode("utf-8")): page_valid = False if page_valid: url_extract_info = pg.get('urlExtractionInfo') forms = pg.get('formsInfo') break if not page_valid: print( 'the page did not pass through any of the specified page classifiers', response.url) else: # do rest of the processing # print('PAGE is VALID --', response.url) follow_urls = { link_to_url(link) for link in self.link_extractor.extract_links(response) if not self._looks_like_logout(link, response) } print('Size of follow-urls: ', len(follow_urls)) yield self.text_cdr_item(response, follow_urls=follow_urls, metadata=metadata) if not self.settings.getbool('FOLLOW_LINKS'): return if self.settings.getbool('PREFER_PAGINATION'): # Follow pagination links; pagination is not a subject of # a max depth limit. This also prioritizes pagination links because # depth is not increased for them. with _dont_increase_depth(response): for url in self._pagination_urls(response): # self.logger.debug('Pagination link found: %s', url) yield request(url, meta={'is_page': True}) #url extraction processing allowed_follow_urls = list() if url_extract_info: url_extract_method = url_extract_info['extractionMethod'] if url_extract_method == 'inferlink': extract_urls = url_extract_info.get('urls') for extract_url in (extract_urls or []): allowed_follow_urls.append(extract_url) else: url_regexes_allow = url_extract_info.get('urlRegexesAllow') url_regexes_deny = url_extract_info.get('urlRegexesDeny') for follow_url in (follow_urls or []): follow = True for url_regex_deny in (url_regexes_deny or []): if search_re(url_regex_deny, follow_url): follow = False break if follow and url_regexes_allow: follow = False for url_regex_allow in (url_regexes_allow or []): if search_re(url_regex_allow, follow_url): follow = True break if follow: allowed_follow_urls.append(follow_url) else: allowed_follow_urls = list(follow_urls) print('Number of urls to be followed - ', len(allowed_follow_urls)) # Follow all the allowed in-domain links. # Pagination requests are sent twice, but we don't care because # they're be filtered out by a dupefilter. for url in allowed_follow_urls: # print('url to be followed: ', url) yield request(url) # urls extracted from onclick handlers for url in get_js_links(response): priority = 0 if _looks_like_url(url) else -15 url = response.urljoin(url) yield request(url, meta={'is_onclick': True}, priority=priority) # go to iframes for link in self.iframe_link_extractor.extract_links(response): yield request(link_to_url(link), meta={'is_iframe': True}) # forms processing for form in (forms or []): form_identity = json.loads(form['identity']) form_method = form['method'] form_params_list = form['params'] kwargs = {} if self.use_splash: kwargs.update(self.setup_splash_args()) meta = {} meta['avoid_dup_content'] = True meta.update(request_meta) kwargs.update(form_identity) for form_params in form_params_list: # SplashRequest for all the params print('===== Submitting FORM again ========' + json.dumps(form_params)) yield SplashFormRequest.from_response(response, formdata=form_params, method=form_method, callback=self.parse, meta=meta.copy(), **kwargs)
def preparation(self, resp): print("TAGS1", resp.meta) # print("TAGS1", resp.cookiejar) form = resp.xpath(u'//*[@id="aspnetForm"]') form_name = form.xpath(u"./@name").extract_first() form_id = form.xpath(u"./@id").extract_first() form_action = form.xpath(u"./@action").extract_first().lstrip("../") print(form_action) # hidden = resp.xpath(u"//input[@type='hidden']") # # fm = dict() # for el in hidden: # name = el.xpath(u"./@name").extract_first() # value = el.xpath(u"./@value").extract_first() # fm[name] = value fm = { # u"__EVENTARGUMENT": u"saveToDisk=format:html;", u"__EVENTARGUMENT": u"saveToWindow=format:csv;", u"__EVENTTARGET": u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI": u"5", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L": u"5", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber": u"5", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"csv", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"csv", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Csv", } formdata1 = { u"__EVENTARGUMENT": u"saveToWindow=format:html;", u"__EVENTTARGET": u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl": u"{"activeTabIndex":1}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu": u"{"selectedItemIndexPath":"","checkedState":""}", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState": u"{"windowsState":"0:0:-1:0:0:0:-10000:-10000:1:0:0:0"}", u"ctl00$ctl00$ctl00$MasinContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State": u"{"CustomCallback":""}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount": u"25", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState": u"{"windowsState":"0:0:-1:430:250:1:68:165:1:0:0:0"}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer": u"{"drillDown":{},"parameters":{},"cacheKey":"","currentPageIndex":0}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid": u"{"keys":[],"callbackState":"BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB","selection":""}", } yield SplashFormRequest.from_response( response=resp, formid=form_id, formdata=formdata1, callback=self.parse_tags, dont_click=True, endpoint='execute', cache_args=['lua_source'], args={'lua_source': script}, )
def parse(self, response): formdata = { u"__EVENTTARGET": u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", # u"__EVENTTARGET": u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", # u"__EVENTARGUMENT": response.xpath(u"//input[@id='__EVENTARGUMENT']/@value").extract_first(), u"__EVENTARGUMENT": u"saveToWindow=format:html;", u"__VSTATE_AV": response.xpath( u"//input[@id='__VSTATE_AV']/@value").extract_first(), u"__VIEWSTATE": response.xpath( u"//input[@id='__VIEWSTATE']/@value").extract_first(), u"__EVENTVALIDATION": response.xpath( u"//input[@id='__EVENTVALIDATION']/@value").extract_first(), u"DXScript": u"1_232,1_134,1_225,1_169,1_226,1_223,1_155,9_45,1_131,1_217,1_206,1_167,1_175,1_138,1_180,1_166,1_164,1_215,1_170,9_38,9_48,9_46,1_153,1_194,1_196", u"DXCss": u"1_33,1_18,0_3879,0_4037,0_4039,0_3877,0_4060,0_3883,0_3885,../Content/bootstrap.min.css,../Styles/Site.css,../images/AgilaireAV.ico", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl": u"{"activeTabIndex":1}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu": u"{"selectedItemIndexPath":"","checkedState":""}", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI": u"2", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber": u"2", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState": u"{"windowsState":"0:0:-1:0:0:0:-10000:-10000:1:0:0:0"}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State": u"{"CustomCallback":""}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L": u"2", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount": u"25", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState": u"{"windowsState":"0:0:-1:430:250:1:68:165:1:0:0:0"}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer": u"{"drillDown":{},"parameters":{},"cacheKey":"","currentPageIndex":1}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid": u"{"keys":[],"callbackState":"BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB","selection":""}", # u"": u"", } formdata1 = { u"__EVENTARGUMENT": u"saveToWindow=format:html;", u"__EVENTTARGET": u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl": u"{"activeTabIndex":1}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu": u"{"selectedItemIndexPath":"","checkedState":""}", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDDState": u"{"windowsState":"0:0:-1:0:0:0:-10000:-10000:1:0:0:0"}", u"ctl00$ctl00$ctl00$MasinContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L$State": u"{"CustomCallback":""}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT6$PageCount": u"25", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDDState": u"{"windowsState":"0:0:-1:430:250:1:68:165:1:0:0:0"}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer": u"{"drillDown":{},"parameters":{},"cacheKey":"","currentPageIndex":0}", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxErrorGrid": u"{"keys":[],"callbackState":"BwQHAwIERGF0YQcnAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABwAHAAcAAgtGb3JtYXRTdGF0ZQcAAgVTdGF0ZQc/BwAHAAcABwAHAAIABQAAAIAJAgtUaW1lT2ZFcnJvcgcACQIAAgADBwQCAAcAAgEHAAcAAgACAQcABwAHAAcAAg1TaG93RmlsdGVyUm93CgIB","selection":""}", u"DXScript": u"1_232,1_134,1_225,1_169,1_226,1_223,1_155,9_45,1_131,1_217,1_206,1_167,1_175,1_138,1_180,1_166,1_164,1_215,1_170,9_38,9_48,9_46,1_153,1_194,1_196", u"DXCss": u"1_33,1_18,0_3879,0_4037,0_4039,0_3877,0_4060,0_3883,0_3885,../Content/bootstrap.min.css,../Styles/Site.css,../images/AgilaireAV.ico", } fm = { # u"__EVENTARGUMENT": u"saveToDisk=format:html;", u"__EVENTARGUMENT": u"saveToWindow=format:html;", u"__EVENTTARGET": u"tl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportViewer", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT5_PageNumber_VI": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber$DDD$L": u"1", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT5$PageNumber": u"1", u"ctl00_ctl00_ctl00_MainContent_MainContent_ReportOutputPlaceHolder_uxTabControl_uxReportToolbar_Menu_ITCNT11_SaveFormat_VI": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat$DDD$L": u"html", u"ctl00$ctl00$ctl00$MainContent$MainContent$ReportOutputPlaceHolder$uxTabControl$uxReportToolbar$Menu$ITCNT11$SaveFormat": u"Html", u"DXScript": u"1_232,1_134,1_225,1_169,1_226,1_223,1_155,9_45,1_131,1_217,1_206,1_167,1_175,1_138,1_180,1_166,1_164,1_215,1_170,9_38,9_48,9_46,1_153,1_194,1_196", u"DXCss": u"1_33,1_18,0_3879,0_4037,0_4039,0_3877,0_4060,0_3883,0_3885,../Content/bootstrap.min.css,../Styles/Site.css,../images/AgilaireAV.ico", } # print(response.xpath(u'//*[@id="DXScript"]').extract_first()) # fm[response.xpath(u'//*[@id="DXScript"]/@name').extract_first().decode()] = response.xpath(u'//*[@id="DXScript"]/@value').extract_first().decode(), # print(formdata) yield SplashFormRequest.from_response( # url=u"http://keap.kdhe.state.ks.us/AirVision/Modules/Reporting/ReportViewers/XtraReportViewer.aspx?dxrep_fake=&q=0a05224c-dc17-4166-9cf6-0fb063c315a7", response, formdata=fm, callback=self.parse_tags, dont_click=True, # endpoint='render.html', )
def login(self, response): yield SplashFormRequest.from_response(response=response, formxpath='//div[3]//tr[1]/td/input', formdata={'name': '*****@*****.**'}, callback=self.go_to_listings)