Ejemplo n.º 1
0
 def __init__(self):
     ua = random.choice(self.user_agent_list)  # 随机选择一个User-Agent
     self.ghost = Ghost()
     self.se = Session(self.ghost,
                       display=False,
                       wait_timeout=60,
                       download_images=False)
     super(GhostMiddleware, self).__init__()
Ejemplo n.º 2
0
 def __init__(self, host, port, timeout):
     #url = 'http://111.161.35.198:12210/youku_ghost.html'
     url = 'http://%s:%s/youku_ghost.html' % (host, port)
     self.ghost = Ghost()
     self.session = Session(self.ghost,
                            wait_timeout=timeout,
                            plugins_enabled=True)
     self.session.open(url)
Ejemplo n.º 3
0
 def process_request(self, request, spider):
     if spider.name in WEBKIT_DOWNLOADER:
         gh = Ghost()
         se = Session(gh, download_images=False)
         se.open(request.url)
         result, resource = se.evaluate(
             'document.documentElement.innerHTML')
         spider.webkit_se = se
         renderedBody = str(resource).encode('utf8')
         return HtmlResponse(request.url, body=renderedBody)
Ejemplo n.º 4
0
def login_qq():
    global se
    ua_m = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1'
    se = Session(Ghost(),
                 user_agent=ua_m,
                 wait_timeout=30,
                 wait_callback=None,
                 display=True,
                 viewport_size=(375, 553),
                 download_images=True)
    url = 'https://ui.ptlogin2.qq.com/cgi-bin/login?style=38&appid=728041403&s_url=https%3A%2F%2Finfoapp.3g.qq.com%2Fg%2Flogin%2Fproxy.jsp%3FsourceUrl%3Dhttps%25253A%25252F%25252Fportal.3g.qq.com%25252F%25253F_r%25253D0.2646472700205946%252526aid%25253Dindex%252526g_f%25253D1283&target=self&low_login=1&low_login_hour=4321&daid=261&islogin=false&uid=-8794356048489038000'
    se.open(url)
    se.set_field_value('#u', '2873723285')
    se.set_field_value('#p', 'tz1006')
    se.click('#go', expect_loading=True)
Ejemplo n.º 5
0
def run3():
    gh = Ghost()
    ss = Session(gh, display=True)

    count = 0
    location = 0
    ss.open('https://edition.cnn.com/election/2016/results/exit-polls/arizona/president')
    ss.wait_timeout()

    html3 = ss.content.encode('utf-8')
    patten = re.compile(r'<td class="exit-poll__cell">', re.M)
Ejemplo n.º 6
0
class YoukuGhostDriver(object):
    def __init__(self, host, port, timeout):
        #url = 'http://111.161.35.198:12210/youku_ghost.html'
        url = 'http://%s:%s/youku_ghost.html' % (host, port)
        self.ghost = Ghost()
        self.session = Session(self.ghost,
                               wait_timeout=timeout,
                               plugins_enabled=True)
        self.session.open(url)

    def parse(self, vid):
        try:
            res = []
            self.session.evaluate('window.getPlayUrl("%s")' % vid)
            success, resources = self.session.wait_for_selector('div[id="ck"]')
            if success:
                ck = self.session.evaluate(
                    'document.getElementById("ck").innerHTML')
                res = ck[0]

        except Exception, e:
            log.app_log.error(traceback.format_exc())

        finally:
Ejemplo n.º 7
0
class GhostMiddleware(object):
    def __init__(self):
        ua = random.choice(self.user_agent_list)  # 随机选择一个User-Agent
        self.ghost = Ghost()
        self.se = Session(self.ghost,
                          display=False,
                          wait_timeout=60,
                          download_images=False)
        super(GhostMiddleware, self).__init__()

    # 通过 Ghost 请求动态网页,代替scrapy的downloader
    def process_request(self, request, spider):
        # self.se.set_proxy(type_='https', host='127.0.0.1', port=1083) # type_根据访问的url手动修改
        self.se.open(request.url)

        print("访问:{0}".format(request.url))

        # 直接返回给spider,而非再传给downloader
        return HtmlResponse(url=request.url,
                            body=self.se.content,
                            encoding="utf-8",
                            request=request)

    def __del__(self):
        self.ghost.exit()

    user_agent_list = [
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
        "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
        "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
        "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
        "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10",
        "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like Gecko) Version/5.0.1 Safari/533.17.8",
        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5",
        "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like Firefox/3.x) SeaMonkey/2.0.12",
        "Mozilla/5.0 (Windows NT 5.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1 SeaMonkey/2.7.1",
        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.8 (KHTML, like Gecko) Chrome/4.0.302.2 Safari/532.8",
        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.464.0 Safari/534.3",
        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.54 Safari/535.2",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7",
        "Mozilla/5.0 (Macintosh; U; Mac OS X Mach-O; en-US; rv:2.0a) Gecko/20040614 Firefox/3.0.0 ",
        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.0.3) Gecko/2008092414 Firefox/3.0.3",
        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1) Gecko/20090624 Firefox/3.5",
        "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.14) Gecko/20110218 AlexaToolbar/alxf-2.0 Firefox/3.6.14",
        "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
    ]
class SinaBookSpider(object):

    # 初始化相关参数
    gh = Ghost()
    ss = Session(gh, display=True)  # 设置display为true, 方便调试

    total = 1526  # 预先计算的总数据量
    count = 0  # 已爬取的数据量

    # 记录解析以及翻页位置
    location = 0
    click_times = 0

    def run(self):
        """
        开始爬虫
        :return:
        """
        # 打开网页
        self.ss.open("http://book.sina.com.cn/excerpt/rwws/")
        # 等待数据加载完成
        self.ss.wait_for_selector(
            '#subShowContent1_static > div:nth-child(20)')

        self.parselist()

        while self.count < self.total:
            if self.click_times is 0:
                # 点击加载更多
                self.ss.click('#subShowContent1_loadMore')
                # 每次翻页,或加载更多,要等待至加载完成
                self.ss.wait_for_selector(
                    '#subShowContent1_static > div:nth-child(21)')

                self.click_times += 1
                self.parselist()
            elif self.click_times is 1:
                self.ss.click('#subShowContent1_loadMore')
                self.ss.wait_for_selector(
                    '#subShowContent1_static > div:nth-child(41)')

                self.click_times += 1
                self.parselist()
            elif self.click_times is 2:
                self.ss.click('#subShowContent1_page .pagebox_next a')
                self.ss.sleep(2)

                self.click_times = 0
                self.location = 0
                self.parselist()

    def parselist(self):
        """
        解析列表页
        :return:
        """
        html = self.ss.content.encode('utf8')
        # print html

        pattern = re.compile(
            r'<div class="item"><h4><a href="(.*?)" target="_blank">', re.M)
        links = pattern.findall(html)

        for i in range(self.location, len(links)):
            print links[i]
            self.count += 1
            self.location += 1
        print self.count
Ejemplo n.º 9
0
#-*- coding:utf-8 -*-
from ghost import Ghost
from ghost import Session
import time
gh = Ghost()
sessin = Session(gh)
while True:
    try:
        page , resource = sessin.open("http://abcabc.gq")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
    try:
        page , resource = sessin.open("http://abcabc.gq/test.php")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
    try:
        page , resource = sessin.open("http://mxqabc.gq")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
    try:
        page , resource = sessin.open("http://mxqabc.gq/test.php")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
Ejemplo n.º 10
0
#!
from ghost import Ghost, Session
from datetime import datetime

item_url = 'http://www.supremenewyork.com/shop/accessories/oi6nqp83m/hsyw4g52m'
checkout_url = 'https://www.supremenewyork.com/checkout'
##############################
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
header = {'User-Agent': ua}
gh = Ghost()
se = Session(gh,
             user_agent=ua,
             wait_timeout=20,
             wait_callback=None,
             display=True,
             viewport_size=(1080, 1680),
             download_images=True)
##############################

se.open(item_url)
se.evaluate("""document.querySelector('input[name="commit"]').click();""")
se.sleep(0.5)
se.open(checkout_url)

ISOFORMAT = '%Y%m%d'
today = datetime.today()
filename = today.strftime(ISOFORMAT)
f = open('supreme' + '/' + filename + '.html', 'w')
f.write(se.content)
f.close()
Ejemplo n.º 11
0
            ip_list.append((type, ip, port))

##########################################


##########################################
UA = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
header = {'User-Agent':UA,
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
    'Accept-Encoding': 'gzip, deflate'
}
s = requests.session()
s.keep_alive = False
gh = Ghost()
se = Session(gh, user_agent=UA, wait_timeout=30, wait_callback=None, display=False, viewport_size=(800, 680), download_images=False)

#################################
# hidemy
def get_hidemy():
    url = 'https://hidemy.name/en/proxy-list/?country=US&type=h&anon=4#list'
    se.open(url)
    se.wait_for_selector('table.proxy__t')
    html = se.content
    soup = BeautifulSoup(html, "html.parser")
    sources = soup.select('tbody > tr')
    for i in sources:
        ip_info = hidemy_info(i)
        add_task(ip_info[0], ip_info[1], ip_info[2])

def hidemy_info(source):
Ejemplo n.º 12
0
#!/usr/bin/env python
#coding:utf-8

from ghost import Ghost, Session
import time
if __name__ == '__main__':
    gh = Ghost()
    se = Session(gh, display = True)
    se.open("https://www.baidu.com/")
    se.show()#完成输入后要刷新
    se.fill("#kw","hello world")
    se.click("#su",btn=0) 
    se.show()#完成输入后要刷新
    time.sleep(10)

Ejemplo n.º 13
0
ck = 'lastid=1507310046413; mp_mixpanel__c=0; __utma=74692624.319201636.1507274548.1507288133.1507306081.5; __utmc=74692624; __utmz=74692624.1507274548.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); _ga=GA1.2.319201636.1507274548; _gid=GA1.2.1580123961.1507274548; tohru=c1ae7f86-7ed0-4cd7-ba85-9c83938b0a1c; cart=1+item--48350%2C17801; _supreme_sess=Z1Q5LzQ3TW5VNlVYbHNSOGo4Qi8xRFlRN0taV0dJbFZuTkUrWUdCWkg0aHVOOXhrY3JmQlRhWXN2dlByY2kwaTlYclg4Tmg5SzhDdGQxb0M3QVB0N05ZSm1zZzZkK0VwUzlsZGpBLzlzQmhsRVpQSzZ0elZnSUljTnEyZmRPWmJDRmFpVW1CTkRaUkZaZVFJU3Y5QVA5disvWE5VZXhsOEtXZ1had2I1SmtNUEVUZGdXOGV0Tjk0YmhWYXFneVFOU2VpbzhGWVpqSnA5dkFxM1JQaDFNOHhIOHczSGgzTDNhaHBaQWlrVkdTUXJTY2wrZ1ZpbUZBcE1BNk9YeXNvcVBrZDNtQ2RRZXdiV1pybFJhc2VIcUczc3pJNlV6T0E1S1RHOG1qOHAyMFZIOERPUG9wMXUzOUdhODUvaGFsSEwzQXphcW91NWhuak9OM0FUSWhUdU5DMFo3SDFzL2ZoT09ac1JGcG9pZXNPcDlKS1hvV1p5N2FJNHdQM1FYODZtL2lmaERmenk5dWtVRjV0QWpYSFBKUHJGTTVqb2NVcWhyNDZqT0ZmNWsrVlVXeDN0KzRaTTlsSGNIZEhaenIvWDdrdmg3TTJqaWtzS0V6NEZpUVRXS3p3Q2xlY2RmWStSTkYwNjVhRXhKOXl2MkJpYlJLQ2liTDNvNkdKd3p1U2orcUIvc3lKVTRmT0c3L3RySUlEWWJYN24zNFlGQ1V4dGgzakQ5VnIrY09GRHI1WmFNek1YeVhNbVpZWmtaNVlhLS1PWXN5Q2ZvSGJVeGVqMnhYMEJBRmdBPT0%3D--26c925e04984f16a492adcb79e6c5f37cfc12697; pure_cart=%7B%2248350%22%3A1%2C%22cookie%22%3A%221%20item--48350%2C17801%22%2C%22total%22%3A%22%24668%22%7D; __utmb=74692624.16.10.1507306081; mp_c5c3c493b693d7f413d219e72ab974b2_mixpanel=%7B%22distinct_id%22%3A%20%2215ef09062b39-0ce82cb3458555-49546c-13c680-15ef09062b46c8%22%2C%22Store%20Location%22%3A%20%22US%20Web%22%2C%22%24initial_referrer%22%3A%20%22%24direct%22%2C%22%24initial_referring_domain%22%3A%20%22%24direct%22%7D; _gat=1; __utmt=1'
##############################
header = {
    'Host': host,
    'User-Agent': ua,
    'Accept': a,
    'Accept-Language': al,
    'Accept_encoding': ae,
    'Referer': re,
    'Cookie': ck
}
gh = Ghost()
se = Session(gh,
             user_agent=ua,
             wait_timeout=40,
             wait_callback=None,
             display=True,
             viewport_size=(800, 600),
             download_images=True)
##############################


def stock(type):
    global soup
    global stock_items
    global stock_list
    print('\033[1;35mLoading the Website...\033[0m')
    stock_html = requests.get(type, headers=header, verify=False).content
    stock_soup = BeautifulSoup(stock_html, "html.parser")
    stock_items = stock_soup.select('.inner-article')
    stock_list = ['a']
Ejemplo n.º 14
0
# -*- coding: UTF-8 -*-
# filename: g.py

from ghost import Ghost, Session
from bs4 import BeautifulSoup

ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14'
ua_m = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Version/11.0 Mobile/15B150 Safari/604.1'
header = {'User-Agent': ua_mo}

gh = Ghost()

se = Session(gh,
             user_agent=ua_m,
             wait_timeout=20,
             wait_callback=None,
             display=True,
             viewport_size=(375, 553),
             download_images=True)


def help():
    print('''
    -----Desktop-----
    header = {\'User-Agent\':ua}
    -----Mobile-----
    Default
    header = {\'User-Agent\':ua_m}
    -----Size-----
    se = Session(gh, user_agent=ua, wait_timeout=20, wait_callback=None, display=True, viewport_size=(800, 680), download_images=True)
    -----Command-----
Ejemplo n.º 15
0
#!/usr/bin/env python
#coding:utf-8

from ghost import Ghost, Session
import time
gh = Ghost()
se = Session(gh, display = True)
se.open("https://www.baidu.com/")
time.sleep(10)

Ejemplo n.º 16
0
#!/usr/bin/env python
#coding:utf-8

from ghost import Ghost, Session
import time
if __name__ == '__main__':
    gh = Ghost()
    se = Session(gh, display=True)
    se.open("https://www.baidu.com/")
    se.show()  #完成输入后要刷新
    se.fill("#kw", "hello world")
    se.click("#su", btn=0)
    se.show()  #完成输入后要刷新
    time.sleep(10)
Ejemplo n.º 17
0
#coding=utf-8
from ghost import Ghost,Session
import urllib

ghost = Ghost()

#url = "http://index.baidu.com/?tpl=trend&word=%B1%E4%D0%CE%BD%F0%B8%D5"
url = "http://piaofang.maoyan.com/movie/246083?_v_=yes"

###### urllib ######
#def getHtml(url):
#    page = urllib.urlopen(url)
#    html = page.read()
#    return html
#
#html = getHtml(url)
#print html

#print page
#print "---" * 30
#print extra_resources

###### Ghost.py ######
with ghost.start():
    session = Session(ghost)
    session.wait_timeout = 999
    page,resource = session.open(url)
    print session.content
    print page.headers, page.url, page.http_status

Ejemplo n.º 18
0
#-*- coding:utf-8 -*-
from ghost import Ghost
from ghost import Session
import time
gh = Ghost()
sessin = Session(gh)
while True:
    try:
        page, resource = sessin.open("http://abcabc.gq")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
    try:
        page, resource = sessin.open("http://abcabc.gq/test.php")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
    try:
        page, resource = sessin.open("http://mxqabc.gq")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
    #print sessin.content
    try:
        page, resource = sessin.open("http://mxqabc.gq/test.php")
        sessin.wait_for_page_loaded(10000)
    except:
        pass
        #print sessin.content
Ejemplo n.º 19
0
from ghost import Ghost, Session

ghost = Ghost()
USERAGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0"

with ghost.start():
    session = Session(ghost, download_images=False, display=True, user_agent=USERAGENT, viewport_size=(800, 600))
    page, rs = session.open("https://m.facebook.com/login.php", timeout=120)
    assert page.http_status == 200

    session.evaluate("""
    document.querySelector('input[name="email"]').value = '*****@*****.**'
    document.querySelector('input[name="pass"]').value = 'wikipedia150101facebook';
    """)

    session.evaluate("""document.querySelector('input[name="login"]').click();""",
                 expect_loading=True)

    """
    import codecs

    with codecs.open('fb.html', encoding='utf-8', mode='w') as f:
       f.write(session.content)
    """

    # session.save_cookies('fbookie')
    session.capture_to(path='fbookie.png')

    # gracefully clean off to avoid errors
    session.webview.setHtml('')
    session.exit()
Ejemplo n.º 20
0
def load_ghost():
  global se
  gh = Ghost()
  se = Session(gh, user_agent=ua, wait_timeout=20, wait_callback=None, display=True, viewport_size=(800, 680), download_images=True)
Ejemplo n.º 21
0
#!/usr/bin/python3
# -*- coding: UTF-8 -*- 
# filename: trade.py

from ghost import Ghost, Session
from bs4 import BeautifulSoup
import pytesseract
from PIL import Image



gh = Ghost()
se = Session(gh, wait_timeout=30, wait_callback=None, display=True, viewport_size=(800, 680), download_images=True)

username = '******'

def login(username, password):
    url = 'https://trade.cgws.com/cgi-bin/user/Login'
    se.open(url)
    # username
    se.set_field_value('#fundAccount', username)
    # password
    #se.show()
    se.fire('#normalpassword', 'focus')
    #se.sleep(0.1)
    html = se.content
    soup =  BeautifulSoup(html, "html.parser")
    keys = soup.select('tbody > tr > td')
    key_list = []
    for key in keys:
        key_list.append(key.text)
Ejemplo n.º 22
0
def round_trip(DepartCity, ReturnCity, departDate, returnDate, debug=0):
    #global se
    start_time = datetime.now()
    url = 'http://flights.ctrip.com/international/round-%s-%s-%s-%s?%s&%s&y_s' % (DepartCity, ReturnCity, code(DepartCity), code(ReturnCity), departDate, returnDate)
    #print(url)
    ctrip_access = False
    while ctrip_access == False:
        se = Session(gh, wait_timeout=30, wait_callback=None, display=True, viewport_size=(800, 680), download_images=False)
        se.delete_cookies()
        proxy = choice(proxypool)
        se.set_proxy(proxy[0], proxy[1], int(proxy[2]))
        try:
            se.open(url, user_agent=choice(ua_list))
            #print('已打开 %s' % url)
        except:
            se.exit()
            del se
            proxypool.remove(proxy)
            blacklist.append(proxy)
            print("blacklist %s" % proxy[1])
            continue
        ctrip_access = se.exists('li:nth-child(5) > span')
        if ctrip_access == False:
            se.exit()
            del se
            proxypool.remove(proxy)
            blacklist.append(proxy)
            print("blacklist %s" % proxy[1])
    se.click('#sortControls > ul > li:nth-child(5) > span')
    if se.exists('i.icon-reverse') == True:
        se.click('#sortControls > ul > li:nth-child(5) > span')
    se.wait_while_selector('#FI_progBar', timeout=20)
    #print('Loading finished!')
    se.sleep(0.2)
    html = se.content
    soup = BeautifulSoup(html, "html.parser")
    source = soup.select('#flightList > div')
    if debug == 1:
        return source
    lowest = source[0].select('span.price2')[0].text
    end_time = datetime.now()
    timedelsta = (end_time - start_time).seconds
    print('%s-%s往返 %s去 %s回 最低价%s 搜索耗时%s秒' %(DepartCity, ReturnCity, departDate, returnDate, lowest, timedelsta))
    se.exit()
    del se
    price = lowest[1:]
    insert_price(DepartCity, ReturnCity, departDate, returnDate, price)
Ejemplo n.º 23
0
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
# filename: sakai.py

from ghost import Ghost, Session
from bs4 import BeautifulSoup
import sms

gh = Ghost()
se = Session(gh,
             wait_timeout=30,
             display=True,
             viewport_size=(375, 553),
             download_images=True)


def login(username, password):
    index()
    se.set_field_value('#username', username)
    se.set_field_value('#password', password)
    se.click('input.btn-submit', expect_loading=True)


def index():
    url = 'https://sakai.apu.edu/portal/pda/?force.login=yes'
    se.open(url)


def get():
    global assignment_list
    assignment_list = []
Ejemplo n.º 24
0
from ghost import Ghost, Session
import time

wrapping_div = ''
next_button = ''
initial_url = ''
series_name = ''

chunks = 10

core_session = Ghost()
session = Session(core_session, display=False)

lower_bound = 0
upper_bound = chunks + lower_bound


def save_story(series_name, lower_bound, upper_bound, content):
    file_name = './chap/{0}_{1}-{2}.txt'.format(series_name, lower_bound,
                                                upper_bound)
    content = ''.join([i if ord(i) < 128 else ' ' for i in content])
    print(file_name)
    with open(file_name, 'wt', encoding='utf-8') as file:
        file.write(content)


searching = True
story_buffer = ''

next_url = initial_url