def get_news_content(): content = json.loads(request.data.decode()) host = 'http://news.ahut.edu.cn' if content[ "mode"] == "0" else 'http://jwc.ahut.edu.cn' content_url = content["contentUrl"] html = ScrapyPage(host + content_url) selector = etree.HTML(html) paragraph = selector.xpath("//div[@id='vsb_content']/p") if not paragraph: paragraph = selector.xpath("//div[@id='vsb_content_501']/p") content = [] for p in paragraph: text = p.xpath("string(.)").strip().replace("\r\n", "").replace(" ", '') if text: content.append({"text": text, "type": 0}) imgs = p.xpath(".//img") for img in imgs: content.append({ "url": host + "/" + img.xpath("./@src")[0], "type": 1 }) return json.dumps({'content': content})
def get_news_list(): content = json.loads(request.data.decode()) mode = content["mode"] static_url = 'http://news.ahut.edu.cn/list.jsp' if mode == "0" else 'http://jwc.ahut.edu.cn/list.jsp' html = ScrapyPage(static_url + content['nextPageUrl']) selector = etree.HTML(html) news = selector.xpath("//a[@class='" + ("c1022" if mode == "0" else "c44456") + "']") try: next_page = selector.xpath("//a[@class='Next']/@href")[0] except IndexError: next_page = '-1' res = [] for new in news: data = { "mode": mode, "href": new.xpath("./@href")[0], "title": new.xpath("./text()")[0].strip(), "time": new.xpath("../following-sibling::td[1]/text()")[0][:-1] if mode == "0" else new.xpath("../following-sibling::td[1]/text()")[0][1:-2] } res.append(data) return json.dumps({"news": res, 'nextPageUrl': next_page})
def search_books(): static_url = 'http://10.100.101.10:8080/opac/openlink.php?historyCount=0&doctype=ALL&match_flag=forward&displaypg=20&sort=CATA_DATE&orderby=desc&showmode=list&dept=ALL' search_string = request.args.get('strText') search_type = request.args.get('strSearchType') static_url += ("&strText=" + search_string + "&strSearchType=" + search_type) selector = etree.HTML(ScrapyPage(static_url)) return json.dumps(get_books(selector))
def get_school_calendar(): base_url = "http://jwc.ahut.edu.cn" static_url = base_url + '/list.jsp?urltype=tree.TreeTempUrl&wbtreeid=1109' html = ScrapyPage(static_url) selector = etree.HTML(html) calendars = selector.xpath("//a[@class='c44456']") data = [] for calendar in calendars: selectot_cal = etree.HTML( ScrapyPage(base_url + calendar.xpath("./@href")[0].strip())) images = [] for cal in selectot_cal.xpath("//div[@id='vsb_content']/p"): images.append(base_url + "/" + cal.xpath("./img/@src")[0]) data.append({ "name": calendar.xpath("./@title")[0].strip(), "images": images }) return json.dumps({"data": data})
def get_week(): try: html = ScrapyPage('http://211.70.149.139:84/jxz.aspx') selector = etree.HTML(html) tr = selector.xpath("//table/tr")[1] week = tr.xpath("./td/font/text()")[0].strip() return json.dumps({'code': 1, 'week': week}) except: return json.dumps({'code': -1})
def get_book_detail(): static_url = 'http://10.100.101.10:8080/opac/item.php' book_id = request.args.get("book_id") static_url += ("?marc_no=" + book_id) selector = etree.HTML(ScrapyPage(static_url)) dls = selector.xpath("//div[@id='item_detail']/dl")[:-2] res = [] for dl in dls: title = dl.xpath("./dt/text()")[0] content = dl.xpath("./dd")[0].xpath("string(.)") if 'ISBN' in title: isbn = content.split('/')[0].replace('-', '') res.append({'title': title, 'content': content}) borrow = selector.xpath("//table[@id='item']/tr")[1:] borrow_info = [] for borrow_ in borrow: td = borrow_.xpath("./td") try: borrow_info.append({ 'index': td[0].xpath("./text()")[0], 'tiao_ma': td[1].xpath("./text()")[0], 'xiao_qu': td[3].xpath("./text()")[0].strip(), 'status': td[4].xpath("string(.)") }) except IndexError as e: continue return json.dumps({ 'info': res, 'book_img': json.loads( ScrapyPage('http://10.100.101.10:8080/opac/ajax_douban.php?isbn=' + isbn))['image'], 'borrow_info': borrow_info })
def check_user(): login_code = json.loads(request.data.decode()) url = 'https://api.weixin.qq.com/sns/jscode2session?appid=' + wx_config[ 'appid'] + '&secret=' + wx_config[ 'secret'] + '&grant_type=authorization_code&js_code=' + login_code[ 'code'] try: open_id = json.loads(ScrapyPage(url))['openid'] bind = UserBind.query.filter(UserBind.openid == open_id).first() if bind: session['user_number'] = bind.number session['user_type'] = bind.identity_type return json.dumps({ 'code': '1', 'msg': '', 'user_type': bind.identity_type, 'user_number': bind.number, 'notification': init_notification() }) session['open_id'] = open_id return json.dumps({'code': '0', 'msg': ''}) except: return json.dumps({'code': '-1', 'msg': '用户验证失败...'})
def search_next_page(): static_url = 'http://10.100.101.10:8080/opac/openlink.php' static_url += json.loads(request.data.decode())["next_page"] selector = etree.HTML(ScrapyPage(static_url)) return json.dumps(get_books(selector))