コード例 #1
0
ファイル: download.py プロジェクト: amumu/webscraping
 def get_user_agent(self, proxy):
     """Get user agent for this proxy
     """
     if proxy in Download.proxy_agents:
         # have used this proxy before so return same user agent
         user_agent = Download.proxy_agents[proxy]
     else:
         # assign random user agent to this proxy
         user_agent = alg.rand_agent()
         Download.proxy_agents[proxy] = user_agent
     return user_agent
コード例 #2
0
 def get_user_agent(self, proxy):
     """Get user agent for this proxy
     """
     if self.settings.keep_ip_ua and proxy in Download.proxy_agents:
         # have used this proxy before so return same user agent
         user_agent = Download.proxy_agents[proxy]
     else:
         # assign random user agent to this proxy
         user_agent = alg.rand_agent()
         Download.proxy_agents[proxy] = user_agent
     return user_agent
コード例 #3
0
ファイル: webkit.py プロジェクト: yuzi3150/SeatPJ2
    def __init__(self,
                 gui=False,
                 user_agent=None,
                 proxy=None,
                 load_images=True,
                 load_javascript=True,
                 load_java=True,
                 load_plugins=True,
                 timeout=20,
                 delay=5,
                 app=None,
                 use_cache=False):
        """Widget class that contains the address bar, webview for rendering webpages, and a table for displaying results

        user_agent: the user-agent when downloading content
        proxy: a QNetworkProxy to download through
        load_images: whether to download images
        load_javascript: whether to enable javascript
        load_java: whether to enable java
        load_plugins: whether to enable browser plugins
        timeout: the maximum amount of seconds to wait for a request
        delay: the minimum amount of seconds to wait between requests
        app: QApplication object so that can instantiate multiple browser objects
        use_cache: whether to cache all replies
        """
        # must instantiate the QApplication object before any other Qt objects
        self.app = app or QApplication(sys.argv)
        super(Browser, self).__init__()

        page = WebPage(user_agent or alg.rand_agent())
        manager = NetworkAccessManager(proxy, use_cache)
        page.setNetworkAccessManager(manager)
        self.setPage(page)
        page.networkAccessManager().finished.connect(self.finished)
        # set whether to enable plugins, images, and java
        self.settings().setAttribute(QWebSettings.AutoLoadImages, load_images)
        self.settings().setAttribute(QWebSettings.JavascriptEnabled,
                                     load_javascript)
        self.settings().setAttribute(QWebSettings.JavaEnabled, load_java)
        self.settings().setAttribute(QWebSettings.PluginsEnabled, load_plugins)
        self.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
        self.timeout = timeout
        self.delay = delay
        if gui:
            self.showNormal()
            self.raise_()
コード例 #4
0
ファイル: webkit.py プロジェクト: akhdir/price-extraction
 def __init__(self, base_url=None, gui=False, user_agent=None, proxy=None, load_images=False, forbidden_extensions=None, allowed_regex='.*?', timeout=20, delay=5, enable_plugins=False):
     self.app = QApplication(sys.argv) # must instantiate first
     QWebView.__init__(self)
     manager = NetworkAccessManager(proxy, forbidden_extensions, allowed_regex)
     manager.finished.connect(self.finished)
     webpage = WebPage(user_agent or alg.rand_agent())
     webpage.setNetworkAccessManager(manager)
     self.setPage(webpage)
     self.setHtml('<html><head></head><body>No content loaded</body></html>', QUrl('http://localhost'))
     self.timeout = timeout
     self.delay = delay
     self.base_url = base_url
     self.jquery_lib = None
     # enable flash plugin etc.
     self.settings().setAttribute(QWebSettings.PluginsEnabled, enable_plugins)
     self.settings().setAttribute(QWebSettings.JavaEnabled, enable_plugins)
     self.settings().setAttribute(QWebSettings.AutoLoadImages, load_images)
     self.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
     if gui: self.show() 
コード例 #5
0
 def __init__(self, base_url=None, gui=False, user_agent=None, proxy=None, load_images=False, forbidden_extensions=None, allowed_regex='.*?', timeout=20, delay=5, enable_plugins=False):
     self.app = QApplication(sys.argv) # must instantiate first
     QWebView.__init__(self)
     manager = NetworkAccessManager(proxy, forbidden_extensions, allowed_regex)
     manager.finished.connect(self.finished)
     webpage = WebPage(user_agent or alg.rand_agent())
     webpage.setNetworkAccessManager(manager)
     self.setPage(webpage)
     self.setHtml('<html><head></head><body>No content loaded</body></html>', QUrl('http://localhost'))
     self.timeout = timeout
     self.delay = delay
     self.base_url = base_url
     self.jquery_lib = None
     # enable flash plugin etc.
     self.settings().setAttribute(QWebSettings.PluginsEnabled, enable_plugins)
     self.settings().setAttribute(QWebSettings.JavaEnabled, enable_plugins)
     self.settings().setAttribute(QWebSettings.AutoLoadImages, load_images)
     self.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
     if gui: self.show()