def fetch(url): data = "" r = "" # p = current_process() # if(p.name != 'MainProcess' and p._identity[0] and os.getpid()): # print('process counter:', p._identity[0], 'pid:', os.getpid()) asession = HTMLSession() asession.headers.update({'User-Agent': fake_useragent.UserAgent().random}) asession.max_redirects = 60 #parsing from proxy # proxy = { 'http': 'http://' + choice(read_file("proxies.txt","\n")) +'/' } # asession.proxies.update(proxy) unf = uniform(1,6) time.sleep(unf) try: r = asession.request('GET', url, allow_redirects=False) except Exception as e: print('Failed to get page %s. Reason: %s' % (url, e)) asession.close() return data try: if(r.status_code == 200): r.html.render(sleep = 2, timeout = 200) data = r.html asession.close() return data else: asession.close() return data except Exception as e: print('Failed to render page %s. Reason: %s' % (url, e)) asession.close() return data
class DojoRequests: """ Small wrapper around the requests library to make it easy to make calls to the dojo/gdp api. DojoRequests automatically adds the cookies in for to make sure the requests are authenticated. For documentation just view the Pythons Requests library. """ def __init__(self): self.session = HTMLSession() if config.get('browser') == 'firefox': self.cookies = browser_cookie3.firefox() else: self.cookies = browser_cookie3.chrome() def request(self, method, url, **kwargs): method = method.upper() request_args = { 'url': url, 'method': method, 'cookies': self.cookies } request_args.update(kwargs) return self.session.request(**request_args) def get(self, url, **kwargs): return self.request('GET', url, **kwargs) def post(self, url, **kwargs): return self.request('POST', url, **kwargs)
def test_request_to_bs4__strainer_missing(mock_soup): session = HTMLSession() with patch.object(session, 'request') as mock_session: mock_session.return_value.text = "some_HTML_code" web_control.request_to_bs4(None, session.request("method", "http://url")) mock_soup.assert_called_once_with("some_HTML_code", "lxml")
def session(populated_cluster): url = populated_cluster["url"].rstrip("/") s = HTMLSession() def new_request(prefix, f, method, url, *args, **kwargs): return f(method, prefix + url, *args, **kwargs) s.request = partial(new_request, url, s.request) return s
class AsyncRequest(object): """ Asynchronous request. Accept same parameters as ``Session.request`` and some additional: :param session: Session which will do request :param callback: Callback called on response. Same as passing ``hooks={'response': callback}`` """ def __init__(self, method, url, **kwargs): #: Request method self.method = method #: URL to request self.url = url #: Associated ``Session`` self.session = kwargs.pop('session', None) if self.session is None: self.session = Session() self._close = True else: self._close = False # don't close adapters after each request if the user provided the session callback = kwargs.pop('callback', None) if callback: kwargs['hooks'] = {'response': callback} #: The rest arguments for ``Session.request`` self.kwargs = kwargs #: Resulting ``Response`` self.response = None def send(self, **kwargs): """ Prepares request based on parameter passed to constructor and optional ``kwargs```. Then sends request and saves response to :attr:`response` :returns: ``Response`` """ merged_kwargs = {} merged_kwargs.update(self.kwargs) merged_kwargs.update(kwargs) try: self.response = self.session.request(self.method, self.url, **merged_kwargs) except Exception as e: self.exception = e self.traceback = traceback.format_exc() finally: if self._close: # if we provided the session object, make sure we're cleaning up # because there's no sense in keeping it open at this point if it wont be reused self.session.close() return self
def verfiy_localbitcoins(username, lbc_username): session = HTMLSession() # Fake user agent to reduce chance of getting seen as a bot headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0', } response = session.request(url=f'https://localbitcoins.com/accounts/profile/{lbc_username}/', method='GET', headers=headers) # If the profile is invalid it will redirect to home if response.html.next() == 'https://localbitcoins.com/': return False code_area = response.html.find('.overflow-catch', first=True) return code_area.text.find(get_verification_code(username)) != -1
class MM(object): def __init__(self): self.__page = 1 self.__url = "http://www.mm131.com/qingchun/list_1_{}.html" self.__session = HTMLSession() self.__headers = { 'Referer': 'http://www.mm131.com/qingchun/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36' } self.__imagePath = r'D:/Photo/MM' self.__confirmPath() def __confirmPath(self): if not os.path.exists(self.__imagePath): os.makedirs(self.__imagePath) def download(self, link, fileName): try: with open(self.__imagePath + '/' + fileName + '.jpg', 'wb') as f: f.write( self.__session.request('get', link, headers=self.__headers, allow_redirects=False).content) except Exception as e: print(str(e)) def parseData(self): start = time.time() while self.__page < 12: if self.__page == 1: self.__url = "http://www.mm131.com/qingchun/" else: self.__url = 'http://www.mm131.com/qingchun/list_1_{}.html'.format( self.__page) r = self.__session.get(self.__url) main = r.html.find(".main", first=True) dl = main.find('dl')[0] dds = dl.find('dd') for dd in dds[:-1]: attr = dd.find('img')[0].attrs imageLink = attr['src'] title = attr['alt'] self.download(imageLink, title) self.__page += 1 end = time.time() - start print("爬取时间:", end)
def parse_course_list(request): if not request.user.is_authenticated(): return HttpResponse("Not authenticated.") session = HTMLSession() course_list_r = session.request(url='https://ocw.mit.edu/courses/', method='get') courses = course_list_r.html.find('.course_title') # Clear out old courses Course.objects.all().delete() sequence_sql = connection.ops.sequence_reset_sql(no_style(), [Course]) with connection.cursor() as cursor: for sql in sequence_sql: cursor.execute(sql) for course in courses: course_db_obj = Course(url=course.absolute_links.pop()) course_db_obj.save() return HttpResponse("Successfully added all ")
class MM(object): def __init__(self): self.__page = 1 self.__url = "http://www.mm131.com/qingchun/list_1_{}.html" self.__session = HTMLSession() self.__headers = { 'Referer':'http://www.mm131.com/qingchun/', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36' } self.__imagePath = r'D:/Photo/MM' self.__confirmPath() def __confirmPath(self): if not os.path.exists(self.__imagePath): os.makedirs(self.__imagePath) def download(self,link,fileName): try: with open(self.__imagePath+'/'+fileName+'.jpg','wb') as f: f.write(self.__session.request('get',link,headers = self.__headers,allow_redirects=False).content) except Exception as e: print(str(e)) def parseData(self): start = time.time() while self.__page < 12: if self.__page == 1: self.__url = "http://www.mm131.com/qingchun/" else: self.__url = 'http://www.mm131.com/qingchun/list_1_{}.html'.format(self.__page) r = self.__session.get(self.__url) main = r.html.find(".main",first=True) dl = main.find('dl')[0] dds = dl.find('dd') for dd in dds[:-1]: attr = dd.find('img')[0].attrs imageLink = attr['src'] title = attr['alt'] self.download(imageLink,title) self.__page += 1 end = time.time() - start print("爬取时间:",end)
def toRequestHtml(url): print(url) session = HTMLSession() # r = session.get(url) r = session.request("GET", url, headers=headerTwo) # r.html.render() print(r.html.html) # 获取页面上的所有链接。 # all_links = r.html.links # print(all_links) about = r.html.find('div.list-item-desc-top') # about = r.html.find('a',containing='www.meituan') for i, title in enumerate(about): aElement = title.html.find('a') nextUrl = f'https:{aElement.attrs["href"]}' nextHtml = session.get(nextUrl) print(f'{i + 1} [{aElement.text}](https:{aElement.attrs["href"]})') nextAbout = nextHtml.html.find('div.d-left') for j, nextTitle in enumerate(nextAbout): print(f'{j + 1} [{nextTitle.text}]')
class SkywardAPI(): """Class for Skyward data retrieval. Parameters ---------- service: str Skyward service for school. timout: int Request timeout (the default is 60) Attributes ---------- timeout : int Seconds until request times out. base_url: str Base url for requests login_url: str URL for login. session_params : Dict[str, Any] Parameters for session. """ def __init__(self, service: str, timeout: int = 60) -> None: self.base_url = "https://skyward.iscorp.com/scripts/wsisa.dll/WService={0}".format( service) self.login_url = self.base_url + "/skyporthttp.w" self.timeout = timeout self.session_params = {} # type: Dict[str, str] self.session = HTMLSession() def edit_srcs(self, page: HTMLResponse) -> HTML: """Edits urls in page to request from Skyward and not local computer. Parameters ---------- page : HTMLResponse HTMLResponse from a request to skyward. Returns ------- HTML HTML object with urls pointing to Skyward website. Side Effects ------------ Attached the HTML object to self.session. If rendering, make sure to close session so chromiums do not pile up. """ new_text = page.text new_text = new_text.replace("src='", "src='{0}/".format(self.base_url)).replace( "href='", "href='{0}/".format(self.base_url)) ''' Replacing values here to make sure that all requests are being made to the skyward site and not the local computer. ''' new_html = HTML(html=new_text, session=self.session) return new_html def timed_request(self, url: str, data: Dict[str, str] = {}, headers: Dict[str, str] = {}, method: str = "post", params: Dict[str, str] = {}) -> HTMLResponse: """Issues a requests-html request with timeout functionality. Automatically closes session at end of request. Parameters ---------- url : str URL for request. data : Dict[str, str] Data for request (the default is {}). headers : Dict[str, str] Headers for request (the default is {}). method : str Method of request (the default is "post"). params : Dict[str, str] Params for request (the default is {}). Returns ------- HTMLResponse Response of request. Raises ------- SkywardError Unable to connect to skyward. Side Effects ------------ Closes self.session and regenerates it. """ start_time = time.time() return_data = None while True: try: return_data = self.session.request(method, url, data=data, headers=headers, params=params) break except requests.exceptions.ConnectionError: if time.time() > start_time + self.timeout: raise SkywardError('Request to Skyward failed.') else: time.sleep(1) finally: self.session.close() self.session = HTMLSession() return return_data def login(self, username: str, password: str) -> Dict[str, Any]: """Logs into Skyward and retreives session data. Parameters ---------- username: str Skyward username. password: str Skyward password. Returns ------- Dict[str, Any] Login data for skyward. Raises ------- ValueError Incorrect username or password. SkywardError Unable to connect to Skyward. """ params = skyward_req_conf params["codeValue"] = username params["login"] = username params["password"] = password req = self.timed_request(self.login_url, data=params) text = req.html.text if "Invalid" in text: raise ValueError("Incorrect username or password") times = 0 while text == "" and times <= 5: req = self.timed_request(self.login_url, data=params) text = req.html.text times += 1 """ Sometimes a request does not go through on the first try. Looping to make sure the api catches this, if it occurs. """ if text == "": raise SkywardError("Skyward returning no login data.") data = parse_login_text(self.base_url, text) return data def setup(self, username: str, password: str) -> None: """Sets up api session data using username and password. Parameters ---------- username : str Skyward username. password : str Skyward password. """ data = self.login(username, password) self.login_data = data self.session_params = self.get_session_params() @staticmethod def from_username_password(username: str, password: str, service: str, timeout: int = 60) -> "SkywardAPI": """Returns a logged-in SkywardAPI object using username and password provided. Parameters ---------- username : str Skyward username. password : str Skyward password. service : str Skyward service. timeout : int Timeout of requests made to Skyward (the default is 60). Returns ------- SkywardAPI API object logged in with supplied credentials. Raises ------- ValueError Incorrect username and password (from setup). SkywardError Unable to connect to Skyward (from setup). """ api = SkywardAPI(service, timeout=timeout) api.setup(username, password) return api @staticmethod def from_session_data(service: str, sky_data: Dict[str, str], timeout: int = 60) -> "SkywardAPI": """Generates an API given a service and session data. Parameters ---------- service : str Skyward service to be used. sky_data : Dict[str, str] Session data from skyward. Returns ------- SkywardAPI An api for the user, given the session info. Raises ------- SessionError If session credentials are revoked by Skyward, error is raised. Side Effects ------------ Closes and regenerates self.session. """ api = SkywardAPI(service, timeout=timeout) api.session_params = sky_data grade_url = api.base_url + "/sfhome01.w" sessionp = api.session_params req3 = api.timed_request(grade_url, data={ "encses": sessionp["encses"], "sessionid": sessionp["sessid"] }) new_html = api.edit_srcs(req3) try: other_data = new_html.render(script=""" () => { return { dwd: sff.getValue('dwd'), nameid: sff.getValue('nameid'), wfaacl: sff.getValue('wfaacl'), } } """, retries=2, timeout=2.5, keep_page=False) api.session.close() api.session = HTMLSession() except MaxRetries: raise SessionError("Session destroyed by Skyward.") api.session_params.update(other_data) return api def get_session_params(self) -> Dict[str, str]: """Gets session data from Skyward for login. Returns ------- Dict[str, str] Session variables. """ ldata = self.login_data req = self.timed_request(ldata["new_url"], data=ldata["params"]) page = req.html obj = {} try: obj["sessid"] = page.find("#sessionid", first=True).attrs["value"] obj["encses"] = page.find("#encses", first=True).attrs["value"] except AttributeError: obj = self.get_session_params() #Again, sometimes this doesn't work on the first try. obj["dwd"] = ldata["params"]["dwd"] obj["nameid"] = ldata["params"]["nameid"] obj["wfaacl"] = ldata["params"]["wfaacl"] return obj def get_class_grades(self, sm_grade: Element, grid_count: int, constant_options: Dict[str, str], url: str, sm_num: int) -> SkywardClass: """Gets class grades given elements and request options. Parameters ---------- sm_grade : Element HTML element containing request information. grid_count : int Grid count parameter on page. constant_options : Dict[str, str] Constant options provided to ensure valid request. url : str Request url. sm_num : int Semester number in question. Returns ------- SkywardClass Grades from a class. """ attrs = sm_grade.attrs specific_request_data = { "corNumId": attrs["data-cni"], "gbId": attrs["data-gid"], "stuId": attrs["data-sid"], "section": attrs["data-sec"], "entityId": attrs["data-eid"] } grade_request_data = constant_options grade_request_data.update(specific_request_data) grade_req = self.timed_request(url, data=grade_request_data, params={"file": "sfgradebook001.w"}) text = grade_req.text start_split = text.find("<![CDATA[") + len("<![CDATA[") end_split = text.find("]]") text_split = text[start_split:end_split + 1] doc = HTML(html=text_split) class_name = doc.find(".gb_heading", first=True).text class_name = class_name.replace("\xa0", " ") sky_class = SkywardClass(class_name, []) semester_info = doc.find("th", first=True) date_range = semester_info.find("span", first=True).text date_range = date_range.replace("(", "").replace(")", "") sem_start_date = date_range.split(" - ")[0] # Date range looks like "(START - END)" so removing ( ) and splitting # gives the start date. sem_grade = doc.find(".odd", first=True) sem_grade_spl = sem_grade.text.split("\n") sem_lg = sem_grade_spl[0] sem_percent = sem_grade_spl[1] sem_asign = Assignment("SEM{0}".format(sm_num), sem_percent, "100", sem_lg, sem_start_date) sky_class.add_grade(sem_asign) scope = doc.find("td") style_str = "padding-right:4px" scope = [ row for row in scope if "style" in row.attrs and row.attrs["style"] == style_str ] scope_major = scope[0] scope_grades = scope[1] list_of_grades = scope_grades.find(".even") + scope_grades.find(".odd") list_of_major_grades = scope_major.find(".even") + scope_major.find( ".odd") assignments = [ assignment for assignment in list_of_grades if "zebra-same" not in assignment.attrs ] major_grades = [ grade for grade in list_of_major_grades if "zebra-same" in grade.attrs and grade.attrs["zebra-same"] == "true" ] for assignment in assignments: assignment_info = assignment.find("td") name = "" date = "" try: date = assignment_info[0].text name = assignment_info[1].text except IndexError: continue assign = None try: lg = assignment_info[2].text point_str = assignment_info[4].text point_str_spl = point_str.split(" out of ") earned = point_str_spl[0] out_of = point_str_spl[1] assign = Assignment(name, earned, out_of, lg, date) except IndexError: assign = Assignment(name, "*", "*", "*", date) sky_class.add_grade(assign) for grade in major_grades: grade_info = grade.find("td") name = "" lg = "" try: desc = grade_info[0].text desc = desc.replace("\n", "") colon_split = desc.split(":") name = colon_split[0] lg = colon_split[1][0] except IndexError as e: continue try: grade_data = grade_info[2].text str_split = grade_data.split(" out of ") earned = str_split[0] out_of = str_split[1] sky_class.add_grade( Assignment(name, earned, out_of, lg, sem_start_date)) except IndexError: sky_class.add_grade( Assignment(name, "*", "*", "*", sem_start_date)) sky_class.sort_grades_by_date() return sky_class def get_semester_grades(self, semester_num: int, page: HTML) -> List[SkywardClass]: """Gets grades for a specific semester. Parameters ---------- semester_num : int 1 or 2 for first or second semester. page : HTML HTML Grade page to get buttons/links/etc. Returns ------- List[SkywardClass] List of class grades. """ grades = [] # type: List[SkywardClass] sessionp = self.session_params grade_buttons = page.find("#showGradeInfo") sm_grade_buttons = [ button for button in grade_buttons if button.attrs["data-lit"] == "SM{0}".format(semester_num) ] grade_req_url = "{0}/httploader.p".format(self.base_url) constant_options = { "encses": sessionp["encses"], "sessionid": sessionp["sessid"], "ishttp": "true", "fromHttp": "yes", "action": "viewGradeInfoDialog", "bucket": "SEM {0}".format(semester_num) } grid_count = 1 for class_sm_grade in sm_grade_buttons: grades.append( self.get_class_grades(class_sm_grade, grid_count, constant_options, grade_req_url, semester_num)) return grades def get_grades(self) -> List[SkywardClass]: """Gets grades from both semesters. Returns ------- List[SkywardClass] Grades from both semesters. Raises ------ SessionError If the session is destroyed, no data can be received. """ grade_url = self.base_url + "/sfgradebook001.w" sessionp = self.session_params req1 = self.timed_request(grade_url, data={ "encses": sessionp["encses"], "sessionid": sessionp["sessid"] }) new_html = self.edit_srcs(req1) if "Your session has timed out" in new_html.text or "session has expired" in new_html.text: raise SessionError("Session destroyed. Session timed out.") ret_data = new_html.render() grades = self.get_semester_grades(1, new_html) grades += self.get_semester_grades(2, new_html) if grades == {}: raise SessionError("Session destroyed. No grades returned.") self.session.close() self.session = HTMLSession() return grades def get_grades_text(self) -> Dict[str, List[str]]: """Converts Assignments in get_grades() to strings Returns ------- Dict[str, List[str]] Grades (as a string) from both semesters. """ grades = self.get_grades() str_grades = {} for sky_class in grades: str_grades[sky_class.skyward_title()] = sky_class.grades_to_text() return str_grades def get_grades_json(self) -> Dict[str, List[Dict[str, Any]]]: """Converts Assignments in get_grades() to strings Returns ------- Dict[str, List[str]] Grades (as a string) from both semesters. """ grades = self.get_grades() json_grades = {} for sky_class in grades: class_grades = sky_class.grades class_grades_json = list( map(lambda grade_obj: grade_obj.__dict__, class_grades)) json_grades[sky_class.skyward_title()] = class_grades_json return json_grades def keep_alive(self) -> None: """Issues a keep-alive request for the session. """ grade_url = self.base_url + "/qsuprhttp000.w?" sessionp = self.session_params req = self.timed_request(grade_url, data={ "dwd": sessionp["dwd"], "idleTimeout": 300000, "myIdleSeconds": 60, "nameid": sessionp["nameid"], "requestAction": "mySession", "wfaacl": sessionp["wfaacl"] }, method="get")
from requests_html import HTMLSession import pymongo import json from util import * if __name__ == "__main__": session = HTMLSession() client = pymongo.MongoClient('mongodb://localhost:27017/') database = client["film"] connection = database["area_detail_data"] for i in get_date_str("2011-01-01", "2019-10-18"): url = 'http://www.films.cn/api/top/theater/boxoffice/local?date=%s&size=30' % i r = session.request("GET", url).content raw_data = json.loads(r) data_theater = raw_data['data'] print(data_theater) connection.update_one({'time': i}, {'$set': {'data_theater': data_theater}}, True) print("=======")
def parse_course(request, course_id): try: course = Course.objects.get(pk=course_id) except Course.DoesNotExist: return HttpResponse("Bad Course Id") session = HTMLSession() info_r = session.request(url=course.url, method='get') # Deactivate bad courses if not info_r: course.active = False return HttpResponse("URL not valid.") # Make sure course is active course.active = True # Set course name course.name = info_r.html.find('#course_title', first=True).find()[0].text # Set course info course_info = info_r.html.find('#course_info', first=True) authors = course_info.find('[itemprop=author]', first=False) course.instructors = "" for i, author in enumerate(authors): if i != 0: course.instructors += ", " course.instructors += author.text for paragraph in course_info.find('p'): if 'itemprop' not in paragraph.attrs: course.course_num = paragraph.text course.asTaughtIn = course_info.find('[itemprop=startDate]', first=True).text course.level = course_info.find('[itemprop=typicalAgeRange]', first=True).text course.description = info_r.html.find('#description', first=True).find('p', first=True).text # Set course syllabus syllabus_r = session.request(url=course.url + '/syllabus/', method='get') if syllabus_r: syllabus = str( syllabus_r.html.find('#course_inner_section', first=True).html) # Removes help modals which come after the tag <!-- googleoff: index--> syllabus = syllabus[0:syllabus.index('<!--googleoff: index-->')] course.syllabus = syllabus else: course.syllabus = "N/A" # Set course readings readings_r = session.request(url=course.url + '/readings/', method='get') if readings_r: readings = str( readings_r.html.find('#course_inner_section', first=True).html) # Removes help modals which come after the tag <!-- googleoff: index--> readings = readings[0:readings.index('<!--googleoff: index-->')] course.readings = readings else: course.readings = "N/A" # Set course tools tools_r = session.request(url=course.url + '/tools/', method='get') if tools_r: tools = str( tools_r.html.find('#course_inner_section', first=True).html) # Removes help modals which come after the tag <!-- googleoff: index--> tools = tools[0:tools.index('<!--googleoff: index-->')] course.tools = tools else: course.tools = "N/A" # Save course course.save() return HttpResponse("Successfully parsed course " + str(course_id) + ".")
from requests_html import HTMLSession import pymongo import json from util import * if __name__ == "__main__": session = HTMLSession() client = pymongo.MongoClient('mongodb://localhost:27017/') database = client["film"] connection = database["film_detail_data"] for i in get_date_str("2011-01-01", "2019-10-18"): url1 = 'http://www.films.cn/api/top/movie/boxoffice?boxofficeTime=%s&movieType=all&topType=day&size=100' % i r1 = session.request("GET", url1).content raw_data1 = json.loads(r1) data_movie = [] # print(raw_data1) for data in raw_data1['data']: data_movie.append({ 'rank': data['rank'], 'id': data['id'], 'movieName': data['movieName'], 'boxOfficeIndexNum': data['boxOfficeIndexNum'], 'popShowIndexNum': data['popShowIndexNum'], 'showDays': int(data['showDays']), 'boxOffice':
class Aqualink(object): def __init__(self, username: str, password: str): self.username = username self.password = password self.session = HTMLSession() self.login_link = None self.home_link = None self.home_cache = None self.devices_link = None self.devices_cache = None self._devices = {} self.lock = threading.Lock() self.last_refresh = 0 self.login() def request(self, url: str, method: str = 'get', **kwargs) -> Response: r = self.session.request(method, url, **kwargs) if r.status_code == 200: logger.debug(f"<- {r.status_code} {r.reason} - {url}") else: logger.warning(f"<- {r.status_code} {r.reason} - {url}") return r def login(self) -> None: logger.debug("Getting Aqualink start page...") start = self.request(ACTION_BASE_URL) form = start.html.find('form', first=True) action = form.xpath('//input[@id = "actionID"]', first=True).attrs['value'] self.login_link = ACTION_BASE_URL % action logger.debug("Login Link: %s" % self.login_link) # Make sure our credentials work. self.home_cache = self.request(self.login_link) if len(self.home_cache.html.find("div.temps")) == 0: payload = {'userID': self.username, 'userPassword': self.password} logger.info("Logging in to Aqualink...") self.home_cache = self.request(self.login_link, 'post', data=payload) if len(self.home_cache.html.find("div.temps")) == 0: self.home_link = None self.home_cache = None raise Exception("Check your username and password.") else: self.home_link = self.home_cache.html.find('li#tabHeader_1', first=True).absolute_links.pop() logger.debug("Home Link: %s" % self.home_link) def refresh(self, force_refresh=False) -> None: self.lock.acquire() # Be nice to Aqualink servers since we rely on polling. now = int(time.time()) delta = now - self.last_refresh if delta < MIN_SECS_TO_REFRESH and not force_refresh: self.lock.release() return try: self._refresh() except Exception as e: logger.error(f"Unhandled exception: {e}") for line in traceback.format_exc().split('\n'): logger.error(line) else: self.last_refresh = int(time.time()) self.lock.release() def _refresh(self, force_refresh=False) -> None: logger.debug("Refreshing device list...") if self.home_link is None: self.login() else: self.home_cache = self.request(self.home_link) self.devices_link = self.home_cache.html.find('li#tabHeader_3', first=True).absolute_links.pop() logger.debug("Devices Link: %s" % self.devices_link) self.devices_cache = self.request(self.devices_link) # Keep track of devices in case they change. This might be overkill and likely to work great. # Probably would be safer to restart the process altogether. previous = set(self._devices.keys()) seen = set() home = self.home_cache.html.find("div#home", first=True) elements = home.find("div.top,div.inbetween,script") # Remove the last element that's a script we're not interested in. elements.pop() def _parse_temperatures( e: Element, devices: Dict[str, AqualinkDevice] ) -> List[str]: temps = self.home_cache.html.find("div.temps") sensors = [] for i in temps: name = re.sub(r"(Temp).*$", r"\1", i.text) entity = name.lower().replace(' ', '_') temp = re.sub(r"^.*Temp", "", i.text) if temp == "--": temp = None else: temp = int(temp.rstrip("°F")) if entity in devices: devices[entity].state = temp else: ss = AqualinkSensor(self, name, entity, temp) devices[entity] = ss sensors += [entity] return sensors def _parse_set_temperatures( e: Element, devices: Dict[str, AqualinkDevice] ) -> List[str]: # First, open the sub-page. sub = self.request(BASE_URL + (e.links.pop())) # Then get the action link to set thermostats. Same for all of them. script = sub.html.find('script')[-2] action = re.match(r".*actionID=(\w+)&temp.*", script.text).groups()[0] control = sub.html.find('div.set_temp_label') thermostats = [] for c in control: (name, temp) = c.text.split("\n") temp = int(temp.rstrip("°F")) entity = c.find('span')[1].attrs['id'] if entity in devices: devices[entity].state = temp devices[entity].action = action else: ts = AqualinkThermostat(self, name, entity, temp, action) devices[entity] = ts thermostats += [entity] return thermostats # Go through all the elements on the page. for e in elements: if e.tag == 'script': continue if 'top' in e.attrs['class']: # Current Temperatures. seen |= set(_parse_temperatures(e, self._devices)) continue if 'inbetween' in e.attrs['class']: # Set Temperatures for Pool/Spa heaters. # This "Set Temperatures" string seems to be safe to use. if e.text == 'Set Temperatures': seen |= set(_parse_set_temperatures(e, self._devices)) continue # At this point, we're pretty sure it's a toggle. # The 'inbetween' element gives us the name/state. The # following 'script' element gives the link to flip it. pass # XXX - This code needs to be made more robust, like the devices page. # Now find the switches for pool/spa. # This is a bit convoluted but labels, states and scripts don't live # in the same element so we need to find all of them individually and # put them together. labels = home.find("div.inbetween") states = home.find("div#home", first=True).find("img") scripts = home.find("script") labels.pop(0) states.pop(0) scripts.pop() for label, state, script in zip(labels, states, scripts): name = label.find('span', first=True).text entity = state.attrs['id'].replace('_state', '') state = DEVICE_STATE_MAP[state.attrs['src']] action = re.match(r".*actionID=(\w+)", script.text).groups()[0] if entity in self._devices: self._devices[entity].state = state self._devices[entity].action = action else: sw = AqualinkSwitch(self, name, entity, state, action) self._devices[entity] = sw seen.add(entity) # Now go through auxiliary devices. These typically include water # features, pool cleaner, lights, ... # Here again, we look for labels, states and scripts separately and # put them all together. devices = self.devices_cache.html.find('div#devices', first=True) objs = [] for e in devices.find('div.inbetween,script'): if e.tag == 'div': label = e.find('span.row_label', first=True) name = " ".join([x.capitalize() for x in label.text.split()]) entity = label.text.lower().replace(' ', '_') state = e.find('img', first=True) state = DEVICE_STATE_MAP[state.attrs['src']] # Create a Light if len(e.links) > 0: # Device is a dimmable light. sub = re.match(r".*actionID=(\w+)", e.links.pop()).groups()[0] # Browse sub-menu. Find the dimming action url. sub_cache = self.request(ACTION_BASE_URL % sub) script = sub_cache.html.find('script')[-1] cur = sub_cache.html.find('span.button-dimmer-selected', first=True) cur = int(cur.attrs['id'].split('_')[-1]) action = re.match(r".*actionID=(\w+)&level=0.*", script.text).groups()[0] sw = AqualinkLight(self, name, entity, state, action, brightness=cur) objs += [sw] else: action = re.match(r".*actionID=(\w+)", e.text).groups()[0] # This is script with an action for the previous element. # Going to assume that people used sensible names for lights. # At least my installer did. if 'Light' in name: sw = AqualinkLight(self, name, entity, state, action) else: sw = AqualinkSwitch(self, name, entity, state, action) objs += [sw] for obj in objs: entity = obj.entity if entity in self._devices: self._devices[entity].state = obj.state self._devices[entity].action = obj.action if type(obj) == AqualinkLight and obj.is_dimmable: self._devices[entity].brightness = obj.brightness else: self._devices[entity] = obj seen.add(entity) # Get rid of devices that went away. missing = previous - seen for i in list(missing): del(self._devices[i]) @property def devices(self) -> List[AqualinkDevice]: return self._devices.values()
from requests_html import HTMLSession import hashlib import requests, os productid = os.getenv("productid", "9") session = HTMLSession() host = "http://183.131.202.93:9090" z = session.request(url=host + "/zentao/index.php?m=user&f=login", method="get") verifyRand = z.html.find("input#verifyRand")[0].attrs["value"] print("随机数:%s" % verifyRand) def getMd5(s): h = hashlib.md5() h.update(s.encode("utf-8")) return h.hexdigest() pwd = getMd5(getMd5("wujingjing123456") + verifyRand) print("密码:%s" % pwd) data = {"account": "wujingjing", "password": pwd, "keepLogin[]": "on"} headers = { "Content-Type": "application/x-www-form-urlencoded", }
class CSRFDetector: def __init__(self, results, reports, **kwargs): report = {'title': 'CSRF Detector', 'overview': "CSRF vulnerabilities is an attack that forces an end user to execute " "unwanted actions on a web application in which they're currently authenticated." "<br> This Detector can automatic detect CSRF Token " "and try to detect 'Referer' header check." "If any following form is important, please consider adding protect.", 'entries': [], 'header': ['URL', 'Method', 'Data', 'Request with referer', 'Request without referer']} self.results = results self.reports = reports self.csrf_report = report self._session = HTMLSession() self.img_suffix = ['jpg', 'png', 'jpge', 'ico', 'gif', 'bmp'] self.filter_words = ['跳', '搜', '查', '找', '登陆', '注册', 'search', 'register', 'login', 'log in', 'sign'] self.action_filters = ['search', 'find', 'login', 'reg', 'logout', 'find' "baidu.com", "google.com", "so.com", "bing.com", "soso.com", "sogou.com"] self.args = kwargs if 'cookie' in self.args and self.args['cookie']: self.cookie = SimpleCookie() self.cookie.load(self.args['cookie']) self.cookie = {key: val for key, val in self.cookie.items()} for key in self.cookie: self._session.cookies.set(*[key, self.cookie[key]]) @staticmethod def meta(): return { 'name': 'CSRF Detector for all', 'version': '1.0' } def find_recaptcha(self, form): """ find recaptcha by non-typical img suffix :param form: :return: """ imgs = form.find('img') for img in imgs: src = img.attrs.get('src', '') if src: if '?' in src: src = src.split('?')[0] suffix = src.split('/')[-1].split('.')[-1] logging.debug("img suffix: {}".format(suffix)) if len(suffix) < 5: if [fil for fil in self.img_suffix if fil in suffix]: # or == logging.info("non recaptcha img in form detected: {}".format(img.attrs['src'])) return False logging.info("recaptcha detected: {}".format(img.attrs['src'])) return True return False def report(self, url, method, data, req, req_no_ref): text = ''.join([str(res.status_code) + '->' for res in req.history]) + str(req.status_code) + ' ' text += req.text if len(req.text) < 150 else 'data length: {}'.format(len(req.text)) text_no_ref = ''.join([str(res.status_code) + '->' for res in req_no_ref.history]) \ + str(req_no_ref.status_code) + ' ' text_no_ref += req_no_ref.text if len(req_no_ref.text) < 150 \ else ' data length: {}'.format(len(req_no_ref.text)) entry = [url, method, urlencode(data), text, text_no_ref] self.csrf_report['entries'].append(entry) def exec(self): visited = set() for form_ in self.results['requests'].values(): location = form_['location'] if location in visited: # 要不要 start with continue visited.add(location) res = self._session.get(location) # render ?? forms = res.html.find('form') for form_index, form in enumerate(forms): # skip useless form by black list action = form.attrs['action'] if 'action' in form.attrs else location if [fil for fil in self.action_filters if fil in action.lower()]: logging.debug('keyword in action, skip {}:'.format(form)) # 用一个id表示? continue # if form.find('input[type="password"]'): # continue # skip by placeholder filter def filter_placeholder(form_to_filter): for ph in form_to_filter.find('[placeholder]'): if [fil for fil in self.filter_words if fil in ph.attrs['placeholder'].lower()]: logging.debug('keyword in placeholder, skip {}'.format(form_to_filter)) return True return False if filter_placeholder(form): continue # skip form without submit button if not form.find('input[type="submit"], button[type="submit"]'): # :submit continue # CSRF Token check if form.find(':hidden'): res2 = self._session.get(location) form2 = res2.html.find('form')[form_index] if find_token(form, form2): continue if form.find('img'): if self.find_recaptcha(form): continue # Referer check data = craft_field(form, location) # if method == 'GET': # location = location.split('?')[0] url = urljoin(location, form.attrs['action']) if 'action' in form.attrs else location method = form.attrs['method'].upper() if 'method' in form.attrs else 'GET' if method == 'GET': req1 = self._session.get(url, headers={'referer': location}) req_no_ref = self._session.get(url) req2 = self._session.get(url, headers={'referer': location}) else: req1 = self._session.request(method, url, headers={'referer': location}) # "allow redirect but check" req_no_ref = self._session.request(method, url) req2 = self._session.request(method, url, headers={'referer': location}) # ensure referer logging.debug('assert no referer: {}'.format(req_no_ref.headers)) assert 'referer' not in req_no_ref.headers if compare(req1, req_no_ref): if not compare(req1, req2): logging.critical('Double check failed: {}\ndata2: {}'.format(req1.text, req2.text)) continue logging.critical( 'CSRF Detected: text length:{}, without referer: text length:{}'.format(len(req1.text), len(req_no_ref.text))) self.report(url, method, data, req1, req_no_ref) logging.warning( 'Referer check detected: text length:{}, without referer: text length:{}'.format(len(req1.text), len(req2.text))) self.csrf_report['overview'] = 'Found {} possible csrf forms. <br>'.format(len(self.csrf_report['entries'])) + \ self.csrf_report['overview'] self.reports.append(self.csrf_report) logging.info("CSRF scan finished !")