def get_index_ec(self): url = get_url("selective_course") html = self._get_html(url) not_error_page(html) courses = parse_standard_table(html, "kcmcgrid", pure=True) url_code = re.findall(r'xkkh=(.*?)&', html) return url_code, courses
def get_course_detail(self, url_code): special = "xkkh=%s&xh=%s" _t = (url_code, self._info["xh"]) url = get_url("course_detail", special) html = self._get_html(url, _t) not_error_page(html, self.uid) classroom_selection = re.findall(r'value="(.*?)".*?xkkh', html) result = parse_standard_table(html, "xjs_table", pure=True) return result, classroom_selection
def get_checkout_book(self, uid): self.init_from_redis(uid, only_cookies=True) res = safe_get(self.checkout_book_url, headers=self.headers, cookies=self.cookies) html = res.content.decode("utf-8") checkout_books = parse_standard_table(html, class_name="table_line", pure=True) return checkout_books
def get_consume(self): dates = self.cal_date() data = { "opertype": "query", "startDate": dates[1], "endDate": dates[0], "input": "查询" } index = 1 consume = [] last_count = 0 while True: if index == 1: html = safe_post(self.consume_url, data=data, headers=self.headers, cookies=self.cookies) index += 1 else: html = safe_get(self.consume_url + "?opertype=page&page=%s" % index, headers=self.headers, cookies=self.cookies) index += 1 html = html.content.decode("utf-8") consume.extend(parse_standard_table(html, pure=True)) count = len(consume) if count < 10 or (last_count != 10 and last_count == count): break else: last_count = count return consume
def search(self, keyword): _url = self.opac_url + "search_adv_result.php?sType0=any&q0=%s&with_ebook=&page=%s" url = _url % (keyword, 1) html = safe_get(url, headers=self.headers).content.decode("utf-8") page = etree.HTML(html) books_num = int( page.xpath(u"//div[@class='box_bgcolor']/font[last()]")[0].text) current_page = 1 books = [] url_code = [] while True: books.extend( parse_standard_table(html, "result_content", pure=True)) url_code.extend(safe_re('marc_no=(.*)"', html)) current_page += 1 if current_page > math.ceil(books_num / 20): break url = _url % (keyword, current_page) html = safe_get(url, headers=self.headers).content.decode("utf-8") return books, url_code
def get_elective_course(self, category): while True: url = get_url("selective_course") html, url = self._get_html(url, only_html=False) if "三秒防刷" in html: print("\n检测到三秒防刷,程序将睡眠三秒钟。") time.sleep(3) else: break viewstate = get_viewstate(html) post_data = { "__EVENTTARGET": "", "__EVENTARGUMENT": "", "__VIEWSTATE": viewstate, "zymc": "", "xx": "", "Button2": "选修课程" } html = safe_post(url=url, data=post_data, headers=self._headers, cookies=self._cookies).text post_data["zymc"] = (category + "|院公选课5").encode("gb2312") del post_data["Button2"] num = 0 current_page = 1 courses = [] url_code = [] page_num = [] while True: try: post_data["__VIEWSTATE"] = get_viewstate(html) html = safe_post(url=url, data=post_data, headers=self._headers, cookies=self._cookies).text _t = parse_standard_table(html, "kcmcgrid", pure=True) courses.extend(_t) _t = re.findall(r'xkkh=(.*?)&', html) url_code.extend(_t) if num == 0: num = int(re.search(r'共(\d*)条记录!', html).group(1)) if (current_page % 10) == 1: page_num = re.findall(r'kcmcgrid\$_ctl\d*\$_ctl\d*', html) if (current_page // 10) == 1: _n = 10 - (num // 10 - 9) page_num = [ page_num[i] for i in range(len(page_num)) if i > _n ] if len(page_num) == 0: break current_page += 1 post_data["__EVENTTARGET"] = page_num.pop(0).replace("$", ":") except AttributeError: print("\n检测到三秒防刷,程序将睡眠三秒钟。") time.sleep(3) return url_code, courses
def get_book_detail(self, marc_no): url = self.opac_url + "item.php?marc_no=" + marc_no html = safe_get(url, headers=self.headers).content.decode("utf-8") remain_books = parse_standard_table(html, "item", pure=True) return remain_books