def __init__(self): ua = random.choice(self.user_agent_list) # 随机选择一个User-Agent self.ghost = Ghost() self.se = Session(self.ghost, display=False, wait_timeout=60, download_images=False) super(GhostMiddleware, self).__init__()
def __init__(self, host, port, timeout): #url = 'http://111.161.35.198:12210/youku_ghost.html' url = 'http://%s:%s/youku_ghost.html' % (host, port) self.ghost = Ghost() self.session = Session(self.ghost, wait_timeout=timeout, plugins_enabled=True) self.session.open(url)
def process_request(self, request, spider): if spider.name in WEBKIT_DOWNLOADER: gh = Ghost() se = Session(gh, download_images=False) se.open(request.url) result, resource = se.evaluate( 'document.documentElement.innerHTML') spider.webkit_se = se renderedBody = str(resource).encode('utf8') return HtmlResponse(request.url, body=renderedBody)
def login_qq(): global se ua_m = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1' se = Session(Ghost(), user_agent=ua_m, wait_timeout=30, wait_callback=None, display=True, viewport_size=(375, 553), download_images=True) url = 'https://ui.ptlogin2.qq.com/cgi-bin/login?style=38&appid=728041403&s_url=https%3A%2F%2Finfoapp.3g.qq.com%2Fg%2Flogin%2Fproxy.jsp%3FsourceUrl%3Dhttps%25253A%25252F%25252Fportal.3g.qq.com%25252F%25253F_r%25253D0.2646472700205946%252526aid%25253Dindex%252526g_f%25253D1283&target=self&low_login=1&low_login_hour=4321&daid=261&islogin=false&uid=-8794356048489038000' se.open(url) se.set_field_value('#u', '2873723285') se.set_field_value('#p', 'tz1006') se.click('#go', expect_loading=True)
def run3(): gh = Ghost() ss = Session(gh, display=True) count = 0 location = 0 ss.open('https://edition.cnn.com/election/2016/results/exit-polls/arizona/president') ss.wait_timeout() html3 = ss.content.encode('utf-8') patten = re.compile(r'<td class="exit-poll__cell">', re.M)
class YoukuGhostDriver(object): def __init__(self, host, port, timeout): #url = 'http://111.161.35.198:12210/youku_ghost.html' url = 'http://%s:%s/youku_ghost.html' % (host, port) self.ghost = Ghost() self.session = Session(self.ghost, wait_timeout=timeout, plugins_enabled=True) self.session.open(url) def parse(self, vid): try: res = [] self.session.evaluate('window.getPlayUrl("%s")' % vid) success, resources = self.session.wait_for_selector('div[id="ck"]') if success: ck = self.session.evaluate( 'document.getElementById("ck").innerHTML') res = ck[0] except Exception, e: log.app_log.error(traceback.format_exc()) finally:
class GhostMiddleware(object): def __init__(self): ua = random.choice(self.user_agent_list) # 随机选择一个User-Agent self.ghost = Ghost() self.se = Session(self.ghost, display=False, wait_timeout=60, download_images=False) super(GhostMiddleware, self).__init__() # 通过 Ghost 请求动态网页,代替scrapy的downloader def process_request(self, request, spider): # self.se.set_proxy(type_='https', host='127.0.0.1', port=1083) # type_根据访问的url手动修改 self.se.open(request.url) print("访问:{0}".format(request.url)) # 直接返回给spider,而非再传给downloader return HtmlResponse(url=request.url, body=self.se.content, encoding="utf-8", request=request) def __del__(self): self.ghost.exit() user_agent_list = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10", "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12", "Mozilla/5.0 (Windows NT 5.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1 SeaMonkey/2.7.1", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.8 (KHTML, like Gecko) Chrome/4.0.302.2 Safari/532.8", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.464.0 Safari/534.3", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7", "Mozilla/5.0 (Macintosh; U; Mac OS X Mach-O; en-US; rv:2.0a) Gecko/20040614 Firefox/3.0.0 ", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.0.3) Gecko/2008092414 Firefox/3.0.3", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1) Gecko/20090624 Firefox/3.5", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.14) Gecko/20110218 AlexaToolbar/alxf-2.0 Firefox/3.6.14", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1" ]
class SinaBookSpider(object): # 初始化相关参数 gh = Ghost() ss = Session(gh, display=True) # 设置display为true, 方便调试 total = 1526 # 预先计算的总数据量 count = 0 # 已爬取的数据量 # 记录解析以及翻页位置 location = 0 click_times = 0 def run(self): """ 开始爬虫 :return: """ # 打开网页 self.ss.open("http://book.sina.com.cn/excerpt/rwws/") # 等待数据加载完成 self.ss.wait_for_selector( '#subShowContent1_static > div:nth-child(20)') self.parselist() while self.count < self.total: if self.click_times is 0: # 点击加载更多 self.ss.click('#subShowContent1_loadMore') # 每次翻页,或加载更多,要等待至加载完成 self.ss.wait_for_selector( '#subShowContent1_static > div:nth-child(21)') self.click_times += 1 self.parselist() elif self.click_times is 1: self.ss.click('#subShowContent1_loadMore') self.ss.wait_for_selector( '#subShowContent1_static > div:nth-child(41)') self.click_times += 1 self.parselist() elif self.click_times is 2: self.ss.click('#subShowContent1_page .pagebox_next a') self.ss.sleep(2) self.click_times = 0 self.location = 0 self.parselist() def parselist(self): """ 解析列表页 :return: """ html = self.ss.content.encode('utf8') # print html pattern = re.compile( r'<div class="item"><h4><a href="(.*?)" target="_blank">', re.M) links = pattern.findall(html) for i in range(self.location, len(links)): print links[i] self.count += 1 self.location += 1 print self.count
#-*- coding:utf-8 -*- from ghost import Ghost from ghost import Session import time gh = Ghost() sessin = Session(gh) while True: try: page , resource = sessin.open("http://abcabc.gq") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content try: page , resource = sessin.open("http://abcabc.gq/test.php") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content try: page , resource = sessin.open("http://mxqabc.gq") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content try: page , resource = sessin.open("http://mxqabc.gq/test.php") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content
#! from ghost import Ghost, Session from datetime import datetime item_url = 'http://www.supremenewyork.com/shop/accessories/oi6nqp83m/hsyw4g52m' checkout_url = 'https://www.supremenewyork.com/checkout' ############################## ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' header = {'User-Agent': ua} gh = Ghost() se = Session(gh, user_agent=ua, wait_timeout=20, wait_callback=None, display=True, viewport_size=(1080, 1680), download_images=True) ############################## se.open(item_url) se.evaluate("""document.querySelector('input[name="commit"]').click();""") se.sleep(0.5) se.open(checkout_url) ISOFORMAT = '%Y%m%d' today = datetime.today() filename = today.strftime(ISOFORMAT) f = open('supreme' + '/' + filename + '.html', 'w') f.write(se.content) f.close()
ip_list.append((type, ip, port)) ########################################## ########################################## UA = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36' header = {'User-Agent':UA, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate' } s = requests.session() s.keep_alive = False gh = Ghost() se = Session(gh, user_agent=UA, wait_timeout=30, wait_callback=None, display=False, viewport_size=(800, 680), download_images=False) ################################# # hidemy def get_hidemy(): url = 'https://hidemy.name/en/proxy-list/?country=US&type=h&anon=4#list' se.open(url) se.wait_for_selector('table.proxy__t') html = se.content soup = BeautifulSoup(html, "html.parser") sources = soup.select('tbody > tr') for i in sources: ip_info = hidemy_info(i) add_task(ip_info[0], ip_info[1], ip_info[2]) def hidemy_info(source):
#!/usr/bin/env python #coding:utf-8 from ghost import Ghost, Session import time if __name__ == '__main__': gh = Ghost() se = Session(gh, display = True) se.open("https://www.baidu.com/") se.show()#完成输入后要刷新 se.fill("#kw","hello world") se.click("#su",btn=0) se.show()#完成输入后要刷新 time.sleep(10)
ck = 'lastid=1507310046413; mp_mixpanel__c=0; __utma=74692624.319201636.1507274548.1507288133.1507306081.5; __utmc=74692624; __utmz=74692624.1507274548.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _ga=GA1.2.319201636.1507274548; _gid=GA1.2.1580123961.1507274548; tohru=c1ae7f86-7ed0-4cd7-ba85-9c83938b0a1c; cart=1+item--48350%2C17801; _supreme_sess=Z1Q5LzQ3TW5VNlVYbHNSOGo4Qi8xRFlRN0taV0dJbFZuTkUrWUdCWkg0aHVOOXhrY3JmQlRhWXN2dlByY2kwaTlYclg4Tmg5SzhDdGQxb0M3QVB0N05ZSm1zZzZkK0VwUzlsZGpBLzlzQmhsRVpQSzZ0elZnSUljTnEyZmRPWmJDRmFpVW1CTkRaUkZaZVFJU3Y5QVA5disvWE5VZXhsOEtXZ1had2I1SmtNUEVUZGdXOGV0Tjk0YmhWYXFneVFOU2VpbzhGWVpqSnA5dkFxM1JQaDFNOHhIOHczSGgzTDNhaHBaQWlrVkdTUXJTY2wrZ1ZpbUZBcE1BNk9YeXNvcVBrZDNtQ2RRZXdiV1pybFJhc2VIcUczc3pJNlV6T0E1S1RHOG1qOHAyMFZIOERPUG9wMXUzOUdhODUvaGFsSEwzQXphcW91NWhuak9OM0FUSWhUdU5DMFo3SDFzL2ZoT09ac1JGcG9pZXNPcDlKS1hvV1p5N2FJNHdQM1FYODZtL2lmaERmenk5dWtVRjV0QWpYSFBKUHJGTTVqb2NVcWhyNDZqT0ZmNWsrVlVXeDN0KzRaTTlsSGNIZEhaenIvWDdrdmg3TTJqaWtzS0V6NEZpUVRXS3p3Q2xlY2RmWStSTkYwNjVhRXhKOXl2MkJpYlJLQ2liTDNvNkdKd3p1U2orcUIvc3lKVTRmT0c3L3RySUlEWWJYN24zNFlGQ1V4dGgzakQ5VnIrY09GRHI1WmFNek1YeVhNbVpZWmtaNVlhLS1PWXN5Q2ZvSGJVeGVqMnhYMEJBRmdBPT0%3D--26c925e04984f16a492adcb79e6c5f37cfc12697; pure_cart=%7B%2248350%22%3A1%2C%22cookie%22%3A%221%20item--48350%2C17801%22%2C%22total%22%3A%22%24668%22%7D; __utmb=74692624.16.10.1507306081; mp_c5c3c493b693d7f413d219e72ab974b2_mixpanel=%7B%22distinct_id%22%3A%20%2215ef09062b39-0ce82cb3458555-49546c-13c680-15ef09062b46c8%22%2C%22Store%20Location%22%3A%20%22US%20Web%22%2C%22%24initial_referrer%22%3A%20%22%24direct%22%2C%22%24initial_referring_domain%22%3A%20%22%24direct%22%7D; _gat=1; __utmt=1' ############################## header = { 'Host': host, 'User-Agent': ua, 'Accept': a, 'Accept-Language': al, 'Accept_encoding': ae, 'Referer': re, 'Cookie': ck } gh = Ghost() se = Session(gh, user_agent=ua, wait_timeout=40, wait_callback=None, display=True, viewport_size=(800, 600), download_images=True) ############################## def stock(type): global soup global stock_items global stock_list print('\033[1;35mLoading the Website...\033[0m') stock_html = requests.get(type, headers=header, verify=False).content stock_soup = BeautifulSoup(stock_html, "html.parser") stock_items = stock_soup.select('.inner-article') stock_list = ['a']
# -*- coding: UTF-8 -*- # filename: g.py from ghost import Ghost, Session from bs4 import BeautifulSoup ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14' ua_m = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1' header = {'User-Agent': ua_mo} gh = Ghost() se = Session(gh, user_agent=ua_m, wait_timeout=20, wait_callback=None, display=True, viewport_size=(375, 553), download_images=True) def help(): print(''' -----Desktop----- header = {\'User-Agent\':ua} -----Mobile----- Default header = {\'User-Agent\':ua_m} -----Size----- se = Session(gh, user_agent=ua, wait_timeout=20, wait_callback=None, display=True, viewport_size=(800, 680), download_images=True) -----Command-----
#!/usr/bin/env python #coding:utf-8 from ghost import Ghost, Session import time gh = Ghost() se = Session(gh, display = True) se.open("https://www.baidu.com/") time.sleep(10)
#!/usr/bin/env python #coding:utf-8 from ghost import Ghost, Session import time if __name__ == '__main__': gh = Ghost() se = Session(gh, display=True) se.open("https://www.baidu.com/") se.show() #完成输入后要刷新 se.fill("#kw", "hello world") se.click("#su", btn=0) se.show() #完成输入后要刷新 time.sleep(10)
#coding=utf-8 from ghost import Ghost,Session import urllib ghost = Ghost() #url = "http://index.baidu.com/?tpl=trend&word=%B1%E4%D0%CE%BD%F0%B8%D5" url = "http://piaofang.maoyan.com/movie/246083?_v_=yes" ###### urllib ###### #def getHtml(url): # page = urllib.urlopen(url) # html = page.read() # return html # #html = getHtml(url) #print html #print page #print "---" * 30 #print extra_resources ###### Ghost.py ###### with ghost.start(): session = Session(ghost) session.wait_timeout = 999 page,resource = session.open(url) print session.content print page.headers, page.url, page.http_status
#-*- coding:utf-8 -*- from ghost import Ghost from ghost import Session import time gh = Ghost() sessin = Session(gh) while True: try: page, resource = sessin.open("http://abcabc.gq") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content try: page, resource = sessin.open("http://abcabc.gq/test.php") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content try: page, resource = sessin.open("http://mxqabc.gq") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content try: page, resource = sessin.open("http://mxqabc.gq/test.php") sessin.wait_for_page_loaded(10000) except: pass #print sessin.content
from ghost import Ghost, Session ghost = Ghost() USERAGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0" with ghost.start(): session = Session(ghost, download_images=False, display=True, user_agent=USERAGENT, viewport_size=(800, 600)) page, rs = session.open("https://m.facebook.com/login.php", timeout=120) assert page.http_status == 200 session.evaluate(""" document.querySelector('input[name="email"]').value = '*****@*****.**' document.querySelector('input[name="pass"]').value = 'wikipedia150101facebook'; """) session.evaluate("""document.querySelector('input[name="login"]').click();""", expect_loading=True) """ import codecs with codecs.open('fb.html', encoding='utf-8', mode='w') as f: f.write(session.content) """ # session.save_cookies('fbookie') session.capture_to(path='fbookie.png') # gracefully clean off to avoid errors session.webview.setHtml('') session.exit()
def load_ghost(): global se gh = Ghost() se = Session(gh, user_agent=ua, wait_timeout=20, wait_callback=None, display=True, viewport_size=(800, 680), download_images=True)
#!/usr/bin/python3 # -*- coding: UTF-8 -*- # filename: trade.py from ghost import Ghost, Session from bs4 import BeautifulSoup import pytesseract from PIL import Image gh = Ghost() se = Session(gh, wait_timeout=30, wait_callback=None, display=True, viewport_size=(800, 680), download_images=True) username = '******' def login(username, password): url = 'https://trade.cgws.com/cgi-bin/user/Login' se.open(url) # username se.set_field_value('#fundAccount', username) # password #se.show() se.fire('#normalpassword', 'focus') #se.sleep(0.1) html = se.content soup = BeautifulSoup(html, "html.parser") keys = soup.select('tbody > tr > td') key_list = [] for key in keys: key_list.append(key.text)
def round_trip(DepartCity, ReturnCity, departDate, returnDate, debug=0): #global se start_time = datetime.now() url = 'http://flights.ctrip.com/international/round-%s-%s-%s-%s?%s&%s&y_s' % (DepartCity, ReturnCity, code(DepartCity), code(ReturnCity), departDate, returnDate) #print(url) ctrip_access = False while ctrip_access == False: se = Session(gh, wait_timeout=30, wait_callback=None, display=True, viewport_size=(800, 680), download_images=False) se.delete_cookies() proxy = choice(proxypool) se.set_proxy(proxy[0], proxy[1], int(proxy[2])) try: se.open(url, user_agent=choice(ua_list)) #print('已打开 %s' % url) except: se.exit() del se proxypool.remove(proxy) blacklist.append(proxy) print("blacklist %s" % proxy[1]) continue ctrip_access = se.exists('li:nth-child(5) > span') if ctrip_access == False: se.exit() del se proxypool.remove(proxy) blacklist.append(proxy) print("blacklist %s" % proxy[1]) se.click('#sortControls > ul > li:nth-child(5) > span') if se.exists('i.icon-reverse') == True: se.click('#sortControls > ul > li:nth-child(5) > span') se.wait_while_selector('#FI_progBar', timeout=20) #print('Loading finished!') se.sleep(0.2) html = se.content soup = BeautifulSoup(html, "html.parser") source = soup.select('#flightList > div') if debug == 1: return source lowest = source[0].select('span.price2')[0].text end_time = datetime.now() timedelsta = (end_time - start_time).seconds print('%s-%s往返 %s去 %s回 最低价%s 搜索耗时%s秒' %(DepartCity, ReturnCity, departDate, returnDate, lowest, timedelsta)) se.exit() del se price = lowest[1:] insert_price(DepartCity, ReturnCity, departDate, returnDate, price)
#!/usr/bin/python3 # -*- coding: UTF-8 -*- # filename: sakai.py from ghost import Ghost, Session from bs4 import BeautifulSoup import sms gh = Ghost() se = Session(gh, wait_timeout=30, display=True, viewport_size=(375, 553), download_images=True) def login(username, password): index() se.set_field_value('#username', username) se.set_field_value('#password', password) se.click('input.btn-submit', expect_loading=True) def index(): url = 'https://sakai.apu.edu/portal/pda/?force.login=yes' se.open(url) def get(): global assignment_list assignment_list = []
from ghost import Ghost, Session import time wrapping_div = '' next_button = '' initial_url = '' series_name = '' chunks = 10 core_session = Ghost() session = Session(core_session, display=False) lower_bound = 0 upper_bound = chunks + lower_bound def save_story(series_name, lower_bound, upper_bound, content): file_name = './chap/{0}_{1}-{2}.txt'.format(series_name, lower_bound, upper_bound) content = ''.join([i if ord(i) < 128 else ' ' for i in content]) print(file_name) with open(file_name, 'wt', encoding='utf-8') as file: file.write(content) searching = True story_buffer = '' next_url = initial_url