def geteasy(): login_re=login() cookies=login_re['cookies'] url=Base.academicurl()+'index.portal' param={ ".f": "f3758", ".pmn": "view", "groupid": "all", "action": "bulletinsMoreView", "search": "true", ".ia": "false", ".pen": "pe1713", } headers={ "Host": "myportal.sxu.edu.cn", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate", "Referer": "http://myportal.sxu.edu.cn/index.portal", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } # print(cookies) # 请求两次才能到主页 rep=requests.get(url=url,headers=headers,cookies=cookies) print(cookies) return {'cookies':cookies}
def verification(): getcode_re = getcode() # 获取4个验证码字母 while True: if getcode_re['yzm']: break else: getcode_re = getcode() url = Base.academicurl()+'captchaValidate.portal' param={ "captcha": getcode_re["yzm"], "what": "captcha", "value": getcode_re["yzm"], "sf_request_type": "ajax", } headers={ "Host":"myportal.sxu.edu.cn", "User-Agent":"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5", "Accept":"text/javascript, text/html, application/xml, text/xml, */*", "Accept-Language":"zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding":"gzip, deflate", "Connection":"keep-alive", "Referer":"http://myportal.sxu.edu.cn/" } rep=requests.get(url=url,params=param,cookies=getcode_re["cookies"],headers=headers) return {'rep':rep.text,'cookies':getcode_re["cookies"]}
def jw_yzm(): """ 获取验证码图片和cookies :return: {cookies,image} """ jwurl = Base.academicurl() headers = { 'Host': 'bkjw.sxu.edu.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', 'Accept': 'image/webp,*/*', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://bkjw.sxu.edu.cn/_data/login.aspx', 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', } rep = requests.get(url=jwurl + '/sys/ValidateCode.aspx?t=636', headers=headers) base64_data = base64.b64encode(rep.content) s = base64_data.decode() return { 'cookies': requests.utils.dict_from_cookiejar(rep.cookies), 'image': 'data:image/jpeg;base64,%s' % s }
def __init__(self, cookies_dict, usesname): self.jw_url = Base.academicurl() self.path = Base.kaochanpath() self.cookies = requests.utils.cookiejar_from_dict(cookies_dict) self.cookies_dict = cookies_dict self.time_path = time.strftime("%Y-%m-%d", time.gmtime()) self.usesname = usesname self.dir_path = self.path + '/' + self.time_path + "/" + str(usesname) if os.path.isdir(self.dir_path): pass else: os.makedirs(self.dir_path)
def getcode(): headers={ "Host":"myportal.sxu.edu.cn", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0", "Accept":"image/webp,*/*", "Accept-Language":"zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding":"gzip, deflate", "Connection":"keep-alive", "Referer":"http://myportal.sxu.edu.cn/" } url=Base.academicurl()+'captchaGenerate.portal' img_rep=requests.get(url=url,headers=headers) yzm=Base.codeOcr(img_rep.content) # print(img_rep.content) cookies=img_rep.cookies return {'cookies':cookies,'yzm':yzm}
def login(): verification_re=verification() while True: if verification_re['rep']: verification_re = verification() # print('xxx',verification_re) else: # print('xxxxx',verification_re) break url = Base.academicurl()+'userPasswordValidate.portal' headers={ "Host": "myportal.sxu.edu.cn", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate", "Content-Type": "application/x-www-form-urlencoded", "Content-Length": "173", "Origin": "http://myportal.sxu.edu.cn", "Connection": "keep-alive", "Referer": "http://myportal.sxu.edu.cn/index.portal", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } data={ "Login.Token1": "201702701159", "Login.Token2": "zl467548", "goto": "http: // myportal.sxu.edu.cn / loginSuccess.portal", "gotoOnFail": "http://myportal.sxu.edu.cn/loginFailure.portal", } rep=requests.post(url=url,data=data,headers=headers,cookies=verification_re['cookies']) if '用户不存在' in rep.text: raise uererror else: cookies_j = requests.utils.dict_from_cookiejar(verification_re['cookies']) cookies_i = requests.utils.dict_from_cookiejar(rep.cookies) cookies= requests.utils.cookiejar_from_dict(dict(cookies_j,**cookies_i), cookiejar=None, overwrite=True) return {'cookies':cookies}
def easy_detail(cookies,sort,f): url = Base.academicurl() + 'detach.portal' param = { ".f": "f"+f, ".pmn": "view", "groupid": "all", "action": "bulletinsMoreView", "search": "true", ".ia": "false", ".pen": "pe"+sort, } headers = { "Host": "myportal.sxu.edu.cn", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate", "Referer": "http://myportal.sxu.edu.cn/index.portal", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } rep=requests.get(url=url,headers=headers,params=param,cookies=cookies) soup_index=BeautifulSoup(rep.text,'lxml') tag_li_list = soup_index.find_all('li') easy_list=[] old_easy=get_json() index=-1 new_first_bulletinId=old_easy[sort] # for tag_li in tag_li_list[::-1]: for tag_li in tag_li_list[::-1]: if "item-readed" in tag_li.get('class'): continue # print("读过了") tag_a=tag_li.find_all('a')[1] title=tag_a.get_text() tag_span=tag_li.find('span',attrs={'style':"float:left;"}) time_=tag_span.get_text().strip() href=tag_a['href'] bulletinId=href.split('&')[-1].split('=')[1] # # if "item-readed" in tag_li.get('class'): # print("读过了") # print(title) index += 1 if index == 0: new_first_bulletinId = bulletinId if old_easy[sort]==bulletinId: old_easy[sort] = new_first_bulletinId save_json(old_easy) break else: easy_list.append({ 'title':title, 'href':href, 'bulletinId':bulletinId, 'time':time_ }) # print(bulletinId) # easy=easy_list[0] for easy in easy_list: easy_url=Base.academicurl()+'detach.portal?'+easy['href'] easy_rep=requests.get(url=easy_url,headers=headers,cookies=cookies) # bulletin-contentpe1735 soup_easy = BeautifulSoup(easy_rep.text,'lxml') # print(soup_easy) tag_content=soup_easy.find('div',attrs={'id':'bulletin-contentpe'+sort}) esay_content = tag_content.get_text() tag_title = soup_easy.find('div',attrs={'class':'bulletin-title'}) # tag_note = soup_easy.find_all('div',attrs={'class':'bulletin-info"'})[1] bulletinId=easy['bulletinId'] new_user = eval('Essay'+sort)(essayid=bulletinId, title=tag_title.get_text().strip(), content=esay_content.strip(),time=easy['time']) # 添加到session: Session.add(new_user) # 提交即保存到数据库: Session.commit() # 关闭session: Session.close() print(tag_title.get_text().strip())
def get_curriculum(cookies_dict, data): cookies = requests.utils.cookiejar_from_dict(cookies_dict) jwurl = Base.academicurl() url = jwurl + '/ZNPK/KBFB_DayJCSel_rpt.aspx' headers = { 'Host': 'bkjw.sxu.edu.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate', 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': '174', 'Origin': 'http://bkjw.sxu.edu.cn', 'Connection': 'keep-alive', 'Referer': 'http://bkjw.sxu.edu.cn/ZNPK/KBFB_DayJCSel.aspx', 'Upgrade-Insecure-Requests': '1', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', } rep = requests.post(url=url, headers=headers, cookies=cookies, data=data) soup = BeautifulSoup(rep.text, 'lxml') curriculum_tag = soup.find_all('table') if curriculum_tag: curriculumList = [] if len(curriculum_tag) == 2: # tr定位不对每8个td一行 tr_tag_list = curriculum_tag[1].find_all('td') tr_Grouping_list = [ tr_tag_list[i:i + 8] for i in range(0, len(tr_tag_list), 8) ] del tr_Grouping_list[0] for tr_Grouping in tr_Grouping_list: curriculum_last = { 'num': '', 'course': '', 'classnum': '', 'peoplenum': '', 'teacher': '', 'session': '', 'location': '', 'classname': '' } if len(curriculumList) != 0: curriculum_last = curriculumList[-1] # if not tr_Grouping[0].text: # # num为空 # pass # # curriculumList curriculum_dict = { 'num': tr_Grouping[0].text, 'course': tr_Grouping[1].text, 'classnum': tr_Grouping[2].text, 'peoplenum': tr_Grouping[3].text, 'teacher': tr_Grouping[4].text, 'session': tr_Grouping[5].text, 'location': tr_Grouping[6].text, 'classname': tr_Grouping[7].text } if not tr_Grouping[0].text: curriculum_dict['num'] = curriculum_last['num'] if not tr_Grouping[1].text: curriculum_dict['course'] = curriculum_last['course'] if not tr_Grouping[2].text: curriculum_dict['classnum'] = curriculum_last['classnum'] if not tr_Grouping[3].text: curriculum_dict['peoplenum'] = curriculum_last['peoplenum'] if not tr_Grouping[4].text: curriculum_dict['teacher'] = curriculum_last['teacher'] if not tr_Grouping[5].text: curriculum_dict['session'] = curriculum_last['session'] if not tr_Grouping[6].text: curriculum_dict['location'] = curriculum_last['location'] if not tr_Grouping[7].text: curriculum_dict['classname'] = curriculum_last['classname'] # pass curriculumList.append(curriculum_dict) # print(curriculum_dict) return { 'status': True, 'image': curriculumList, 'mes': '', 'cookies': cookies_dict } else: return { 'status': True, 'image': curriculumList, 'mes': '', 'cookies': cookies_dict } else: return { 'status': False, 'image': [], 'mes': '验证码错误', 'cookies': cookies }
def __init__(self, cookies_dict, usesname): self.jw_url = Base.academicurl() self.cookies = requests.utils.cookiejar_from_dict(cookies_dict) self.cookies_dict = cookies_dict self.usesname = usesname
def check_jwyzm(cookies_dict, username, password, yzm): """ :param cookie: :param username: :param password: :param yzm: :return: """ try: cookies = requests.utils.cookiejar_from_dict(cookies_dict) chuli = Land_chuli(username=username, password=password, yzm=yzm) jwurl = Base.academicurl() url = jwurl + '/_data/login.aspx' data = { '__VIEWSTATE': '/wEPDwUJNDU0MDQ0MDI5ZGQ=', '__EVENTVALIDATION': Base.event(), 'pcInfo': 'Mozilla/5.0+(Windows+NT+10.0;+Win64;+x64;+rv:66.0)+Gecko/20100101+Firefox/66.0Windows+NT+10.0;+Win64;+x645.0+(Windows)+SN:NULL', 'typeName': '%D1%A7%C9%FA', 'dsdsdsdsdxcxdfgfg': chuli[0], 'fgfggfdgtyuuyyuuckjg': chuli[1], 'Sel_Type': 'STU', 'txt_asmcdefsddsd': username, 'txt_pewerwedsdfsdff': '', 'txt_sdertfgsadscxcadsads': '', 'txt_mm_expression': chuli[2], 'txt_mm_length': chuli[3], 'txt_mm_userzh': chuli[4], } header = { 'Host': 'bkjw.sxu.edu.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://bkjw.sxu.edu.cn/_data/login.aspx', 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': '526', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', } response = requests.post(url=url, headers=header, data=data, cookies=cookies) soup = BeautifulSoup(response.text, 'lxml') # print(soup) # print(response.text) dlstatus = soup.find(id="divLogNote").get_text() if '稍候' in dlstatus: return { 'status': True, 'mes': dlstatus, 'use': { 'usename': username, 'password': password }, 'cookies': cookies_dict } else: return { 'status': False, 'mes': dlstatus, 'use': { 'usename': username, 'password': password }, 'cookies': cookies_dict } except: cookies = requests.utils.cookiejar_from_dict(cookies_dict) chuli = Land_chuli(username=username, password=password, yzm=yzm) jwurl = Base.academicurl() url = jwurl + '/_data/login.aspx' data = { '__VIEWSTATE': '/wEPDwUJNDU0MDQ0MDI5ZGQ=', '__EVENTVALIDATION': '/wEWAgKvk+gMApnB768G', 'pcInfo': 'Mozilla/5.0+(Windows+NT+10.0;+Win64;+x64;+rv:66.0)+Gecko/20100101+Firefox/66.0Windows+NT+10.0;+Win64;+x645.0+(Windows)+SN:NULL', 'typeName': '%D1%A7%C9%FA', 'dsdsdsdsdxcxdfgfg': chuli[0], 'fgfggfdgtyuuyyuuckjg': chuli[1], 'Sel_Type': 'STU', 'txt_asmcdefsddsd': username, 'txt_pewerwedsdfsdff': '', 'txt_sdertfgsadscxcadsads': '', 'txt_mm_expression': chuli[2], 'txt_mm_length': chuli[3], 'txt_mm_userzh': chuli[4], } header = { # Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0 'Host': 'bkjw.sxu.edu.cn', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://bkjw.sxu.edu.cn/_data/login.aspx', 'Content-Type': 'application/x-www-form-urlencoded', 'Content-Length': '526', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', } response = requests.post(url=url, headers=header, data=data, cookies=cookies) soup = BeautifulSoup(response.text, 'lxml') # print(soup) # print(response.text) dlstatus = soup.find(id="divLogNote").get_text() if '稍候' in dlstatus: return { 'status': True, 'mes': dlstatus, 'use': { 'usename': username, 'password': password }, 'cookies': cookies_dict } else: return { 'status': False, 'mes': dlstatus, 'use': { 'usename': username, 'password': password }, 'cookies': cookies_dict }