def parse(self, response): one_day_time = timedelta(days=1) start_date = self.start_date today = date.today() body=lxml.html.fromstring(response.body) process_date = start_date view_state = body.cssselect('input[name=__VIEWSTATE]')[0].value event_target = "cal_Date" data = {} while True: data.update({ "cal_Date" : process_date.strftime('%Y-%m-%d') }) #scrapy 提供的好用功能!! 可以先幫你parse response 的目標form, 擷取資料後再修改自己的資料再送出就好XD yield FormRequest.from_response( response = response, formdata = data, formname = "frmInfo", callback = self.parse_item ) #日期遞增 process_date += one_day_time if process_date > today: break
def parse(self, response): return [FormRequest.from_response( response, formdata={'username': self.username, 'password': self.password}, formnumber=1, callback=self.after_login )]
def _handle_captcha(self, response, callback): # FIXME This is untested and wrong. captcha_solve_try = response.meta.get('captcha_solve_try', 0) product = response.meta['product'] self.log("Captcha challenge for %s (try %d)." % (product['url'], captcha_solve_try), level=INFO) captcha = self._solve_captcha(response) if captcha is None: self.log( "Failed to guess captcha for '%s' (try: %d)." % ( product['url'], captcha_solve_try), level=ERROR ) result = None else: self.log( "On try %d, submitting captcha '%s' for '%s'." % ( captcha_solve_try, captcha, product['url']), level=INFO ) result = FormRequest.from_response( response, formname='', formdata={'field-keywords': captcha}, callback=callback) result.meta['captcha_solve_try'] = captcha_solve_try + 1 result.meta['product'] = product return result
def _handle_captcha(self, response, callback): # FIXME This is untested and wrong. captcha_solve_try = response.meta.get('captcha_solve_try', 0) url = response.url self.log("Captcha challenge for %s (try %d)." % (url, captcha_solve_try), level=INFO) captcha = self._solve_captcha(response) if captcha is None: self.log("Failed to guess captcha for '%s' (try: %d)." % (url, captcha_solve_try), level=ERROR) result = None else: self.log("On try %d, submitting captcha '%s' for '%s'." % (captcha_solve_try, captcha, url), level=INFO) meta = response.meta.copy() meta['captcha_solve_try'] = captcha_solve_try + 1 result = FormRequest.from_response( response, formname='', formdata={'field-keywords': captcha}, callback=callback, dont_filter=True, meta=meta) return result
def parse(self, response): hxs = HtmlXPathSelector(response) cookies = response.request.cookies try: count = hxs.select( u'//select[@name="ddrQuantity"]/option/@value').extract()[-1] except Exception: count = 1 formdata = { u'ddrQuantity': unicode(count), # 购买数量 u'txtReceivingRole': cookies[QKYJConst.qkyj_config_Role], # 收获角色 u'txtSureReceivingRole': cookies[QKYJConst.qkyj_config_Role], # 确认 u'txtPhone': cookies[QKYJConst.qkyj_config_tel], # 联系电话 u'txtQq': cookies[QKYJConst.qkyj_config_QQ], # QQ } try: yield FormRequest.from_response( response, ZXY_QKYJ_Deal_Spider().parse, formdata=formdata, ) except Exception as e: self.log(u'%s' % str(e), log.INFO)
def parse(self, response): cookies = response.request.cookies name = u"535521469" pwd = u"Corleone1016@" return FormRequest.from_response(response, u"loginform", formdata={u"u":name, u"p":pwd}, parse=FOST_Login_Spider().parse, cookies=cookies)
def over_under_trends_get(self, response): request = FormRequest.from_response( response, callback=self.over_under_trends ) request.meta['team'] = response.meta['team'] return request
def log_in(self, response): # fill in username and password return FormRequest.from_response(response, url=self.login_url, formid="login_form", formdata={ "email": self.login_user, "pass": self.login_pass }, callback=self.after_login)
def parse(self, response): return [ FormRequest.from_response(response, formdata={ 'username': self.username, 'password': self.password }, formnumber=1, callback=self.after_login) ]
def parse(self, response): if 'Player List' in response.body: return self.after_login(response) else: return [FormRequest.from_response( response, formdata={'login': YAHOO_USERNAME, 'passwd': YAHOO_PASSWORD}, callback=self.parse_page, dont_filter=True, dont_click=True)]
def parse(self, response): ids_re = r'__doPostBack\(\'(.*)\'\)' for id in response.css('.AspNet-TreeView-Root a').re(ids_re): requestInfo = urllib.parse.unquote(id).split("','") formdata = { '__EVENTTARGET': requestInfo[0], '__EVENTARGUMENT': requestInfo[1], } request = FormRequest.from_response(response=response, formdata=formdata, callback=self.takeEachParty, dont_click=True) yield request
def parse(self, response): self.log("Parsing", level=logging.INFO) if response.xpath('//form[@id="loginForm"]'): self.log("Found login form", level=logging.INFO) yield FormRequest.from_response(response, formid='loginForm', formdata={'cust': self.member, 'pin': self.pin, 'rToken': 'null', }, callback=self.after_login) self.log("Done parsing main page", logging.INFO)
def parse(self, response): print "FFFFFFFFFFFFFFF" fd= { "accountLocked":"null", "auth_mode":"BASIC", "orig_url":"null", "password":"******", "user":"******", "userId":"null", "userName":"******", "userNameCB":"on", } return FormRequest.from_response(response, formdata=fd,callback=self.navi_swarm)
def login(self, response=None): print 'login' url = self.login_page if(response == None): return Request(url, method="POST", dont_filter=True, callback=self.login) name = html.fromstring(response.body).cssselect('#ithelpProfile h3') if(name): return self.make_requests() else: user = raw_input("enter username: "******"enter password: "******"username":user, "password":password} return FormRequest.from_response(response, formname="login", formdata=data, callback=self.login, dont_filter=True)
def parse(self, response): self.email = '*****@*****.**' % self.username fr = FormRequest.from_response( response=response, formxpath='//*[@id="registerForm"]', formdata={ 'action': 'register', 'redirect': 'ajax', 'source': 'Checkout', 'SFproductID': '', 'first_name': self.username, 'last_name': self.username, 'username': self.email, 'password': self.password, }, callback=self.account_created ) fr = fr.replace(url="https://www.veritasprep.com/checkout/LIBRARY/auth/AEntry.php") yield fr
def parse_search_page(self, response): self.log("English selected", logging.INFO) # with codecs.open('index.html', 'w', 'utf-8') as f: # f.write(response.text) # inspect_response(response, self) # extract some info about the user user_name = response.xpath('//span[@class="header-name"]/text()').extract_first() user_number = response.xpath('//span[@class="header-number"]/text()').extract_first() user_mileage = response.xpath('//span[@class="header-mileage"]/text()').extract_first() self.log("name=" + user_name, logging.INFO) self.log("number=" + user_number, logging.INFO) self.log("mileage=" + user_mileage, logging.INFO) # submit search form to /adr/SearchProcess.do; this will redirect # to /adr/Results.do, which is responsible for displaying the progress # bar yield FormRequest.from_response(response, formxpath='//form[@name="onewayTravel"]', formdata={ 'currentTripTab': 'oneway', 'modifySearch': 'false', 'forceIkk': 'false', 'city1FromOneway': 'YYZ', 'city1FromOnewayCode': 'YYZ', 'city1ToOneway': 'LHR', 'city1ToOnewayCode': 'LHR', 'l1Oneway': '2016-07-09', 'l1OnewayDate': '2016-07-09', 'OnewayFlexibleDatesHidden': '0', 'OnewayAdultsNb': '1', 'OnewayChildrenNb': '0', 'OnewayTotalPassengerNb': '1', 'OnewayCabin': 'Business', }, callback=self.parse_results_do )
def parse(self, response): hxs = HtmlXPathSelector(response) cookies = response.request.cookies try: count = hxs.select(u'//select[@name="ddrQuantity"]/option/@value').extract()[-1] except Exception : count = 1 formdata = {u'ddrQuantity':unicode(count), # 购买数量 u'txtReceivingRole':cookies[QKYJConst.qkyj_config_Role], # 收获角色 u'txtSureReceivingRole':cookies[QKYJConst.qkyj_config_Role], # 确认 u'txtPhone':cookies[QKYJConst.qkyj_config_tel], # 联系电话 u'txtQq':cookies[QKYJConst.qkyj_config_QQ], # QQ } try: yield FormRequest.from_response(response, ZXY_QKYJ_Deal_Spider().parse, formdata=formdata,) except Exception as e: self.log(u'%s'%str(e),log.INFO)
def login(self, response): return FormRequest.from_response(response, formdata={'username': '******', 'password': '******'}, callback=self.after_login)
def login(self, response): return FormRequest.from_response(response, formxpath='//ul[@class="ipsForm ipsForm_vertical ipsPad_double left"]', formdata={'ips_username': '', # your username 'ips_password': ''}, # your password callback=self.check_login_response)
def login( self, response ): """Generate a login request.""" return FormRequest.from_response( response, formdata = {'name': 'herman', 'password': '******'}, callback = self.check_login_response )
def parse(self, response): yield FormRequest.from_response(response, formname="principal", formdata=self.__getFormData(), callback=self.on_search, errback=self.handle_form_error)
def parse(self, response): yield FormRequest.from_response(response, formnumber=1, formdata={'termChoice': curTerm}, callback=self.parse_result_page)
def parse(self, response): return [FormRequest.from_response(response, formdata={'zipCodeSelector':'10004'}, method='POST', callback=self.after_login)]
def parse_start_url(self, response): if 'init' in response.url: yield FormRequest.from_response(response, formnumber=1)
def login(self, response): #"""Generate a login request.""" return FormRequest.from_response(response, formdata={'username': self.username, 'password': self.password}, callback=self.check_login_response)