def test_urlretrieve(self):
        url = urljoin(self.uri, "/mechanize/")
        test_filename = "python.html"

        def check_retrieve(opener, filename, headers):
            self.assertEqual(headers.get('Content-Type'), 'text/html')
            f = open(filename)
            data = f.read()
            f.close()
            opener.close()
            from urllib import urlopen
            r = urlopen(url)
            self.assertEqual(data, r.read())
            r.close()

        opener = mechanize.build_opener()
        verif = CallbackVerifier(self)
        filename, headers = opener.retrieve(url, test_filename, verif.callback)
        try:
            self.assertEqual(filename, test_filename)
            check_retrieve(opener, filename, headers)
            self.assert_(os.path.isfile(filename))
        finally:
            os.remove(filename)

        opener = mechanize.build_opener()
        verif = CallbackVerifier(self)
        filename, headers = opener.retrieve(url, reporthook=verif.callback)
        check_retrieve(opener, filename, headers)
        # closing the opener removed the temporary file
        self.failIf(os.path.isfile(filename))
Beispiel #2
0
def slurp_with_login_and_pwd():
    import sys
    import mechanize
    # sys.path.append('ClientCookie-1.0.3')
    # from mechanize import ClientCookie
    # sys.path.append('ClientForm-0.1.17')
    # import ClientForm

    # Create special URL opener (for User-Agent) and cookieJar
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    fp = mechanize.urlopen("http://login.yahoo.com")
    forms = mechanize.ParseResponse(fp)
    fp.close()

    # print forms on this page
    for form in forms:
        print "***************************"
        print form

    form = forms[0]
    form["login"] = "******"  # use your userid
    form["passwd"] = "password"  # use your password
    fp = mechanize.urlopen(form.click())
    fp.close()
    fp = mechanize.urlopen(
        "https://class.coursera.org/ml-003/lecture/download.mp4?lecture_id=1"
    )  # use your group
    fp.readlines()
    fp.close()
Beispiel #3
0
    def retrieve_product_data(self, product_link):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        browser = mechanize.Browser()
        product_data = browser.open(product_link).get_data()
        soup = BeautifulSoup(product_data)

        product_name = soup.find('title').string.encode('ascii', 'ignore')

        product_prices = soup.find('div', 'price').contents

        try:
            cash_price = int(clean_price_string(product_prices[4]))

            product_data = ProductData()
            product_data.custom_name = product_name
            product_data.price = cash_price
            product_data.url = product_link
            product_data.comparison_field = product_link

            return product_data
        except IndexError:
            return None
Beispiel #4
0
def constructUploadName(loginname, requestedfilename):
    # construct name
    import random
    filename = os.path.split(requestedfilename)[1]
    filename = filename[:string.find(filename, ".")] + ".jpg"  # construct jpg extension
    resultName = string.lower(loginname + "_" + APP_VERSION_STR + "_" + filename)  # prepend loginname
    resultName = string.replace(resultName, " ", "_")  # replace spaces
    resultName = string.replace(resultName, "'", "_")  # replace '
    # resultName = urllib.quote(resultName) #make safe url
    theResult = ""
    for theChar in resultName:
        if theChar in ALLOWED_CHARS:
            theResult += theChar
    resultName = theResult
    # check whether ok
    # build opener. Can be extended to handle cookies/proxies
    opener = mechanize.build_opener()
    # goto upload page
    request3 = mechanize.Request(FORUM_UPLOAD_URL)
    response3 = opener.open(request3)
#    page = string.lower(response3.read())
    response3.close()
    random.seed()
    # while not name ok, permutate
    for _i in range(6):
        resultName = str(random.random())[-1] + resultName  # prepend with random number
    # while string.find(page,resultName)<>-1:
    return resultName
Beispiel #5
0
def uploadFileToAquaforum(uploadFilename, requestedFileName):
    '''
    returns response page
    '''

    # build opener. Can be extended to handle cookies/proxies
    opener = mechanize.build_opener()
    # goto upload page
    request3 = mechanize.Request(FORUM_UPLOAD_URL)
    response3 = opener.open(request3)

    # parse form on upload page and add file
    forms = mechanize.ParseResponse(response3, backwards_compat=False)
    form = forms[0]
    filectr = form.find_control("imgfile")
    # filectr.add_file(open('/home/jasper/avatar.jpg'),"image/jpeg","avatar.jpg")
    theFile = file(uploadFilename, 'rb')
    filectr.add_file(theFile, "image/jpeg", os.path.split(
        requestedFileName)[-1])
    # obtain form data
    request4 = form.click()  # urllib2.Request object
    theFile.close()
    request4.add_header('Referer', response3.geturl())
    response4 = opener.open(request4)
    return response4.read()
Beispiel #6
0
    def __init__(self,
                 feed_id,
                 logging,
                 keyfile="xively.key",
                 timeout=5,
                 uptime=False):
        threading.Thread.__init__(self)

        # private key stored in a file
        try:
            api_key = open(keyfile).readlines()[0].strip()
        except IOError as e:
            raise Xively_Exception("missing api key file: xively.key")

        self.feed_id = feed_id
        self.timeout = timeout
        self.logger = logging.getLogger('xively')

        self.opener = mechanize.build_opener()
        self.opener.addheaders = [('X-ApiKey', api_key)]
        self.data = []
        self.payload = {}

        if uptime:
            self.add_uptime()
Beispiel #7
0
	def __init__(self, username="******", password="******"):
		self.username = "******"+username
		self.password = password
		self.password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
		ntlm_auth = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(self.password_manager)
		opener = mechanize.build_opener(ntlm_auth)
		mechanize.install_opener(opener)
 def __init__(self, feed_id, apikey):
     self._version = "1.0.0"
     self._feed_id = feed_id
     self._opener = mechanize.build_opener()
     self._opener.addheaders = [('X-ApiKey', apikey)]
     self._data = []
     self._payload = {}
def readUrl(inUrl):

    tryCount = 0
    while tryCount < 5 :
#        print "Create CookieJar"
        cookies = mechanize.CookieJar()
#        print "Build Opener"
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
#        print "Add Headers"
        opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible; MyProgram/0.1)"),("From", "*****@*****.**")]
#        print "Install Opener"
        mechanize.install_opener(opener)
        try:
#            print "Open URL"
            response = mechanize.urlopen(inUrl)
            tryCount = 99
        except:
            tryCount += 1
            print "******** Error on urlopen ***********"
            print "URL: ", inUrl
            print "Trying Again....", tryCount

#    print response.read()
#    html = urllib.urlopen(inUrl).read()
#    print "Reading Response"
    html = response.read()
#    print "Response Read:", html[0:100]
    root = lxml.html.fromstring(html)
#    print "Root created: ", root

    return root
    def __callRequest(self):
        cookieJar = mechanize.LWPCookieJar()
        try: #TODO ohne try evtl.
            cookieJar.load(self._cookiePath, self.__bIgnoreDiscard, self.__bIgnoreExpired)
        except Exception as e:
            logger.info(e)
        sParameters = urllib.urlencode(self.__aParameters)

        opener = mechanize.build_opener(SmartRedirectHandler,
                                        mechanize.HTTPEquivProcessor,
                                        mechanize.HTTPRefreshProcessor)
        if (len(sParameters) > 0):
            oRequest = mechanize.Request(self.__sUrl, sParameters)
        else:
            oRequest = mechanize.Request(self.__sUrl)

        for aHeader in self.__aHeaderEntries:                
                for sHeaderKey, sHeaderValue in aHeader.items():
                    oRequest.add_header(sHeaderKey, sHeaderValue)
        cookieJar.add_cookie_header(oRequest)
        
        if self.caching and self.cacheTime > 0:
            sContent = self.readCache(self.getRequestUri())
            if sContent:
                return sContent
        try:
            oResponse = opener.open(oRequest,timeout = 60)             
        except mechanize.HTTPError, e:
            if not self.ignoreErrors:
                xbmcgui.Dialog().ok('xStream','Fehler beim Abrufen der Url:',self.__sUrl, str(e))
                logger.error("HTTPError "+str(e)+" Url: "+self.__sUrl)
                return ''
            else:
                oResponse = e                 
Beispiel #11
0
	def _parsingRobotsFile(self, url):
		"""
		Setup internal state after downloading robots.txt
		If urlopen.code in (401, 403), all user-agent is disallowed. -> 삭제
		If urlopen.code >= 400, all user-agent is allowd.
		"""
		domain_name = urlparse.urlparse(url)[1]
		rules = RobotsTextRules()
		#getLogger().debug("++Trying to download: %s", url)
		
		opener = mechanize.build_opener(mechanize.HTTPRefreshProcessor,)

		rq = mechanize.Request(url)
		rq.add_header("User-agent", "Mozilla/5.0 (compatible; Windows NT 6.1?; ZumBot/1.0; http://help.zum.com/inquiry)")


		#shttp.setRequestHeader(user_agent = USER_AGENT)
		rs = None
		try:
			rs = opener.open(rq)
			header = rs.info()
			rules.return_code = rs.code
		except Exception, msg:
			try:
				if not url.startswith("http://www."):
					t_url = url.replace("http://", "http://www.")
					rq = mechanize.Request(t_url )
					rq.add_header("User-agent", "Mozilla/5.0 (compatible; Windows NT 6.1?; ZumBot/1.0; http://help.zum.com/inquiry)")
					rs = opener.open(rq)
					header = rs.info()
					rules.return_code = rs.code
			except Exception, msg:
				return rules
Beispiel #12
0
 def __init__(self, feed_id, apikey):
   self._version = "1.0.0"
   self._feed_id = feed_id
   self._opener = mechanize.build_opener()
   self._opener.addheaders = [('X-ApiKey',apikey)]
   self._data = []
   self._payload = {}
Beispiel #13
0
    def init(self):
        br = mechanize.Browser()
        br.set_handle_robots(False)
        
        self.cj = mechanize.LWPCookieJar()
        br.set_cookiejar(self.cj)
        
        br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(),
                              max_time=1)
        
        br.open("https://www.tumblr.com/login")
        br.select_form(nr=0)
        
        br['user[email]'] = ""
        br['user[password]'] = ""
        
        url, data, hdrs = br.form.click_request_data()
        br.open("https://www.tumblr.com/login", data)

        self.nf = 0

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(self.cj))
        mechanize.install_opener(opener)
        self._fetch()
Beispiel #14
0
    def _checkStoredInjections(self):
        for r in self.results:
            # At this state injections in Result obj are not
            # compacted yet so it will only be 1st injected param
            url, data = r.target.getPayloadedUrl(r.first_param, "")
            
            # In case of proxy 
            if self.engine.getOption('http-proxy') is not None:
                proxy = ProxyHandler({'http': self.engine.getOption('http-proxy')})
                opener = build_opener(proxy)
                install_opener(opener)
            
            # Some headers
            if self.engine.getOption('ua') is not None:
                if self.engine.getOption('ua') is "RANDOM":
                    headers = {'User-Agent': random.choice(USER_AGENTS)}
                else:
                    headers = {'User-Agent': self.engine.getOption('ua')}
            else:
                headers = {}
            if self.engine.getOption("cookie") is not None:
                headers["Cookie"] = self.engine.getOption("cookie")

            # Build the request
            req = Request(url, data, headers)
            try:
                to = 10 if self.engine.getOption('http-proxy') is None else 20
                response = urlopen(req, timeout=to)
            except HTTPError, e:
                self._addError(e.code, r.target.getAbsoluteUrl())
                continue 
            except URLError, e:
                self._addError(e.reason, r.target.getAbsoluteUrl())
                continue
def customizeUserAgent():
    import mechanize
    cookies = mechanize.CookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
    # Pretend to be Chrome to avoid getting the mobile site.
    opener.addheaders = [("User-agent", "Chrome/16.0.912.63")]
    mechanize.install_opener(opener)
Beispiel #16
0
    def retrieve_product_links(self):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        url_base = 'http://www.globalmac.cl/'

        browser = mechanize.Browser()

        url_extensions = [
            ['Distribuidor-Apple-Chile/MacBook-Air', 'Notebook'],
            ['Distribuidor-Apple-Chile/MacBook-Pro', 'Notebook'],
            ['Hardware-Mac-PC/Discos-Duros-Notebook-SATA-2.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SATA-3.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SSD-SATA-2.5', 'StorageDrive'],
        ]

        product_links = []

        for url_extension, ptype in url_extensions:
            url = url_base + url_extension
            base_data = browser.open(url).get_data()
            soup = BeautifulSoup(base_data)

            for item in soup.findAll('div', 'name'):
                product_links.append([item.find('a')['href'], ptype])

        return product_links
Beispiel #17
0
    def _performInjections(self, target):
        # Check every parameter 
        for k, v in target.params.iteritems():
            pl = Payload(taint=True)
            url, data = target.getPayloadedUrl(k, pl.payload)
            
            # In case of proxy 
            if self.engine.getOption('http-proxy') is not None:
                proxy = ProxyHandler({'http': self.engine.getOption('http-proxy')})
                opener = build_opener(proxy)
                install_opener(opener)
            # Some headers
            if self.engine.getOption('ua') is not None:
                if self.engine.getOption('ua') is "RANDOM":
                    headers = {'User-Agent': random.choice(USER_AGENTS)}
                else:
                    headers = {'User-Agent': self.engine.getOption('ua')}
            else:
                headers = {}
            if self.engine.getOption("cookie") is not None:
                headers["Cookie"] = self.engine.getOption("cookie")

            # Build the request
            req = Request(url, data, headers)
            try:
                to = 10 if self.engine.getOption('http-proxy') is None else 20
                response = urlopen(req, timeout=to)
            except HTTPError, e:
                self._addError(e.code, target.getAbsoluteUrl())
                return
            except URLError, e:
                self._addError(e.reason, target.getAbsoluteUrl())
                return
Beispiel #18
0
def slurp_with_login_and_pwd():
    import sys
    import mechanize
    # sys.path.append('ClientCookie-1.0.3')
    # from mechanize import ClientCookie
    # sys.path.append('ClientForm-0.1.17')
    # import ClientForm

    # Create special URL opener (for User-Agent) and cookieJar
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    fp = mechanize.urlopen("http://login.yahoo.com")
    forms = mechanize.ParseResponse(fp)
    fp.close()

    # print forms on this page
    for form in forms:
        print "***************************"
        print form

    form = forms[0]
    form["login"]  = "******" # use your userid
    form["passwd"] = "password"      # use your password
    fp = mechanize.urlopen(form.click())
    fp.close()
    fp = mechanize.urlopen("https://class.coursera.org/ml-003/lecture/download.mp4?lecture_id=1") # use your group
    fp.readlines()
    fp.close()
Beispiel #19
0
def readUrl(inUrl):

    tryCount = 0
    while tryCount < 5:
        #        print "Create CookieJar"
        cookies = mechanize.CookieJar()
        #        print "Build Opener"
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        #        print "Add Headers"
        opener.addheaders = [("User-agent",
                              "Mozilla/5.0 (compatible; MyProgram/0.1)"),
                             ("From", "*****@*****.**")]
        #        print "Install Opener"
        mechanize.install_opener(opener)
        try:
            #            print "Open URL"
            response = mechanize.urlopen(inUrl)
            tryCount = 99
        except:
            tryCount += 1
            print "******** Error on urlopen ***********"
            print "URL: ", inUrl
            print "Trying Again....", tryCount


#    print response.read()
#    html = urllib.urlopen(inUrl).read()
#    print "Reading Response"
    html = response.read()
    #    print "Response Read:", html[0:100]
    root = lxml.html.fromstring(html)
    #    print "Root created: ", root

    return root
Beispiel #20
0
    def _retrieve_product(cls, url):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        browser = mechanize.Browser()
        product_data = browser.open(url).get_data()
        soup = BeautifulSoup(product_data)

        product_name = soup.find('h1').string.encode('ascii', 'ignore')
        product_price = soup.find('span', {'id': 'product_price'})
        product_price = Decimal(clean_price_string(product_price.string))

        payment_methods = ['cash', 'deposit', 'wire_transfer']

        additional_data = soup.find('td', 'descr').findAll('h3')

        if not additional_data:
            payment_methods.extend(['debit_card', 'credit_card'])
        elif additional_data[0].string and 'Contado' not in \
                                           additional_data[0].string:
            payment_methods.extend(['debit_card', 'credit_card'])

        prices = {}
        for p in payment_methods:
            prices[p] = product_price

        return [product_name, prices]
Beispiel #21
0
    def retrieve_product_data(self, product_link):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        browser = mechanize.Browser()
        product_data = browser.open(product_link).get_data()
        soup = BeautifulSoup(product_data)

        product_name = soup.find('title').string.encode('ascii', 'ignore')

        product_prices = soup.find('div', 'price').contents

        try:
            cash_price = int(clean_price_string(product_prices[4]))

            product_data = ProductData()
            product_data.custom_name = product_name
            product_data.price = cash_price
            product_data.url = product_link
            product_data.comparison_field = product_link

            return product_data
        except IndexError:
            return None
Beispiel #22
0
    def retrieve_product_links(self):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        url_base = 'http://www.globalmac.cl/'

        browser = mechanize.Browser()

        url_extensions = [
            ['Distribuidor-Apple-Chile/MacBook-Air', 'Notebook'],
            ['Distribuidor-Apple-Chile/MacBook-Pro', 'Notebook'],
            ['Hardware-Mac-PC/Discos-Duros-Notebook-SATA-2.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SATA-3.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SSD-SATA-2.5', 'StorageDrive'],
            ]

        product_links = []

        for url_extension, ptype in url_extensions:
            url = url_base + url_extension
            base_data = browser.open(url).get_data()
            soup = BeautifulSoup(base_data)

            for item in soup.findAll('div', 'name'):
                product_links.append([item.find('a')['href'], ptype])

        return product_links
Beispiel #23
0
def themain():
    #browser=mechanize.Browser()
    #browser.open('http://www.baidu.com')
    cj = mechanize.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
    mechanize.install_opener(opener)
    r = mechanize.urlopen('http://www.baidu.com')
    cj.save('cookie.txt', ignore_discard=True, ignore_expires=True)
def GetHtml(url):
	opener = mechanize.build_opener()
	opener.addheaders = [("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0) Gecko/20100101 Firefox/4.0")]
	mechanize.install_opener(opener)
	request = mechanize.urlopen(url)
	html = request.read()
	request.close()
	return html
Beispiel #25
0
 def __init__(self, username="******", password="******"):
     self.username = "******" + username
     self.password = password
     self.password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
     ntlm_auth = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(
         self.password_manager)
     opener = mechanize.build_opener(ntlm_auth)
     mechanize.install_opener(opener)
Beispiel #26
0
 def post(self,URL,QueryString,headers=None):
     
     opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(self.cookieJar))
     if headers:
         opener.addheaders = headers
     qs = urllib.urlencode(QueryString)
     response = opener.open(URL,qs)
     return response
    def addAuthentication(self, url, username, password):
        passman = mechanize.HTTPPasswordMgrWithDefaultRealm()
        passman.add_password(None, url, username, password)
        # create the NTLM authentication handler
        auth_NTLM = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(passman)

        # create and install the opener
        opener = mechanize.build_opener(auth_NTLM)
        return opener
Beispiel #28
0
    def resolve(self, url, cookie_jar, user_agent):
        headers = {'User-agent': user_agent, 'Referer': url}

        try:
            cookie_jar.load(ignore_discard=True)
        except Exception as e:
            logger.info(e)

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(cookie_jar))

        request = mechanize.Request(url)
        for key in headers:
            request.add_header(key, headers[key])

        try:
            response = opener.open(request)
        except mechanize.HTTPError as e:
            response = e

        body = response.read()

        cookie_jar.extract_cookies(response, request)
        cookie_helper.check_cookies(cookie_jar)

        parsed_url = urlparse(url)
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme,
                                                      parsed_url.netloc)

        params = {}

        try:
            params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"',
                                           body).group(1)
            params["pass"] = re.search(r'name="pass" value="(.+?)"',
                                       body).group(1)

            js = self._extract_js(body)
        except mechanize.HTTPError as e:
            return None

        params["jschl_answer"] = str(js + len(parsed_url.netloc))

        sParameters = urllib.urlencode(params, True)

        request = mechanize.Request("%s?%s" % (submit_url, sParameters))
        for key in headers:
            request.add_header(key, headers[key])

        sleep(5)

        try:
            response = opener.open(request)
        except mechanize.HTTPError as e:
            response = e

        return response
Beispiel #29
0
 def addAuthentication(self, url, username, password):
     passman = mechanize.HTTPPasswordMgrWithDefaultRealm()
     passman.add_password(None, url, username, password)
     # create the NTLM authentication handler
     auth_NTLM = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(passman)
     
     # create and install the opener
     opener = mechanize.build_opener(auth_NTLM)
     return opener
Beispiel #30
0
    def __init__(self, **kwargs):
        #self._opener = kwargs.get('opener',None)

        self._username = unicode(kwargs.get('user', RedditSession._username))
        self._passwd = unicode(kwargs.get('passwd', RedditSession._password))
        self._cookies = mechanize.CookieJar()
        self._opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(self._cookies))

        self._do_login()
 def test_robots(self):
     plain_opener = mechanize.build_opener(
         mechanize.HTTPRobotRulesProcessor)
     browser = mechanize.Browser()
     for opener in plain_opener, browser:
         r = opener.open(urljoin(self.uri, "robots"))
         self.assertEqual(r.code, 200)
         self.assertRaises(mechanize.RobotExclusionError, opener.open,
                           urljoin(self.uri, "norobots"))
Beispiel #32
0
def setup_mechanize():
    """
    Set up user agent for all mechanize calls.
    """
    cookies = mechanize.CookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
    homepage = "http://github.com/aszlig/picfetcher"
    opener.addheaders = [("User-agent", "PicFetcher/0.1.0 (+%s)" % homepage)]
    mechanize.install_opener(opener)
Beispiel #33
0
 def test_robots(self):
     plain_opener = mechanize.build_opener(mechanize.HTTPRobotRulesProcessor)
     browser = mechanize.Browser()
     for opener in plain_opener, browser:
         r = opener.open(urljoin(self.uri, "robots"))
         self.assertEqual(r.code, 200)
         self.assertRaises(
             mechanize.RobotExclusionError,
             opener.open, urljoin(self.uri, "norobots"))
Beispiel #34
0
 def test_retrieve_to_named_file(self):
     url = urljoin(self.uri, "/mechanize/")
     test_filename = os.path.join(self.make_temp_dir(), "python.html")
     opener = mechanize.build_opener()
     verif = CallbackVerifier(self)
     filename, headers = opener.retrieve(url, test_filename, verif.callback)
     self.assertEqual(filename, test_filename)
     self._check_retrieve(url, filename, headers)
     self.assert_(os.path.isfile(filename))
Beispiel #35
0
    def __callRequest(self):
        if self.caching and self.cacheTime > 0:
            sContent = self.readCache(self.getRequestUri())
            if sContent:
                return sContent

        cookieJar = mechanize.LWPCookieJar(filename=self._cookiePath)
        try:  # TODO ohne try evtl.
            cookieJar.load(ignore_discard=self.__bIgnoreDiscard,
                           ignore_expires=self.__bIgnoreExpired)
        except Exception as e:
            logger.info(e)

        sParameters = urllib.urlencode(self.__aParameters, True)

        handlers = [
            SmartRedirectHandler, mechanize.HTTPEquivProcessor,
            mechanize.HTTPRefreshProcessor
        ]
        if sys.version_info >= (2, 7, 9) and sys.version_info < (2, 7, 11):
            handlers.append(newHTTPSHandler)
        opener = mechanize.build_opener(*handlers)
        if (len(sParameters) > 0):
            oRequest = mechanize.Request(self.__sUrl, sParameters)
        else:
            oRequest = mechanize.Request(self.__sUrl)

        for key, value in self.__headerEntries.items():
            oRequest.add_header(key, value)
        cookieJar.add_cookie_header(oRequest)

        user_agent = self.__headerEntries.get(
            'User-Agent',
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
        )

        try:
            oResponse = opener.open(oRequest, timeout=self.requestTimeout)
        except mechanize.HTTPError, e:
            if e.code == 503 and e.headers.get("Server") == 'cloudflare-nginx':
                html = e.read()
                oResponse = self.__check_protection(html, user_agent,
                                                    cookieJar)
                if not oResponse:
                    logger.error("Failed to get CF-Cookie for Url: " +
                                 self.__sUrl)
                    return ''
            elif not self.ignoreErrors:
                xbmcgui.Dialog().ok('xStream', 'Fehler beim Abrufen der Url:',
                                    self.__sUrl, str(e))
                logger.error("HTTPError " + str(e) + " Url: " + self.__sUrl)
                return ''
            else:
                oResponse = e
    def setUp(self):
        TestCase.setUp(self)
        fixture_name = "test_urllib2_localnet_ProxyAuthTests_server"
        self.register_context_manager(fixture_name,
                                      testprogram.ServerCM(self._make_server))
        server = self.get_cached_fixture(fixture_name)

        proxy_url = "http://127.0.0.1:%d" % server.port
        handler = mechanize.ProxyHandler({"http" : proxy_url})
        self.proxy_digest_handler = mechanize.ProxyDigestAuthHandler()
        self.opener = mechanize.build_opener(handler, self.proxy_digest_handler)
Beispiel #37
0
    def setUp(self):
        TestCase.setUp(self)
        fixture_name = "test_urllib2_localnet_ProxyAuthTests_server"
        self.register_context_manager(fixture_name,
                                      testprogram.ServerCM(self._make_server))
        server = self.get_cached_fixture(fixture_name)

        proxy_url = "http://127.0.0.1:%d" % server.port
        handler = mechanize.ProxyHandler({"http": proxy_url})
        self.proxy_digest_handler = mechanize.ProxyDigestAuthHandler()
        self.opener = mechanize.build_opener(handler,
                                             self.proxy_digest_handler)
Beispiel #38
0
 def test_retrieve(self):
     # not passing an explicit filename downloads to a temporary file
     # using a Request object instead of a URL works
     url = urljoin(self.uri, "/mechanize/")
     opener = mechanize.build_opener()
     verif = CallbackVerifier(self)
     request = mechanize.Request(url)
     filename, headers = opener.retrieve(request, reporthook=verif.callback)
     self.assertEquals(request.visit, False)
     self._check_retrieve(url, filename, headers)
     opener.close()
     # closing the opener removed the temporary file
     self.failIf(os.path.isfile(filename))
Beispiel #39
0
def openUrl(url, cookie=None, login=False):
    """
    Opens a given url through mechanize. 

    If there is no cookie (string path) passed in or if there is a cooke path
    passed in but the login parameter is False (signifying to open the url with
    cookie saved in the cookie path), the html from the opened url is returned
    as a string.

    If a cookie path is passed in and the login parameter is True, then the
    Mechanize.Broswer object is returned to perform a yogaglo login through
    a form submission.

    """
    browser = mechanize.Browser()
    browser.addheaders = [
        ('User-Agent',
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:24.0) Gecko/20100101 Firefox/24.0'),
        ('Accept',
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
        ('Accept-Language', 'en-gb,en;q=0.5'),
        ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'),
        ('Keep-Alive', '115'),
        ('Connection', 'keep-alive'),
        ('Cache-Control', 'max-age=0'),
    ]

    #Experimental?
    # browser.set_handle_gzip(True)
    browser.set_handle_redirect(True)
    browser.set_handle_referer(True)
    browser.set_handle_robots(False)
    browser.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time = 1)
    
    if not cookie is None:
	cj = cookielib.LWPCookieJar()
	browser.set_cookiejar(cj)
	opener = mechanize.build_opener(HTTPCookieProcessor(cj))
	mechanize.install_opener(opener)
	
	# trying to login, no cookie, must return browser so it can follow the
	# login url
	if login is True:
		browser.open(url)
		return browser
		
	# can't set to expire, can't read when this particular cookie expires
	cj.load(cookie , ignore_discard=True)

    return browser.open(url).read()
Beispiel #40
0
 def __init__(self):
     self.cj = mechanize.LWPCookieJar()
     opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(self.cj))
     mechanize.install_opener(opener)
     self.br = mechanize.Browser()
     self.br.set_cookiejar(self.cj)
     self.sessionkey = 'None'
     self.br.set_header(
         'User-Agent',
         value=
         'Mozilla/5.0 (X11; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0'
     )
     # self.br.set_debug_http(True)
     self.br.set_debug_redirects(True)
Beispiel #41
0
def initialize_browser():
    """Configurações para contornar os cookies, robots.txt e outros para fingir ser um browser normal."""
    cookiejar = cookielib.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookiejar))
    mechanize.install_opener(opener)
    browser = mechanize.Browser()
    browser.set_handle_robots(False)
    browser.set_handle_redirect(True)
    browser.set_cookiejar(cookiejar)
    browser.set_handle_equiv(True)
    browser.set_handle_referer(True)
    browser.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=2)
    browser.addheaders = [('User-agent', 'Google Chrome')]
    return browser, cookiejar  
Beispiel #42
0
 def setUp(self):
     mechanize._testcase.TestCase.setUp(self)
     self.test_uri = urljoin(self.uri, "test_fixtures")
     self.server = self.get_cached_fixture("server")
     if self.no_proxies:
         old_opener_m = mechanize._opener._opener
         old_opener_u = urllib2._opener
         mechanize.install_opener(mechanize.build_opener(
                 mechanize.ProxyHandler(proxies={})))
         urllib2.install_opener(urllib2.build_opener(
                 urllib2.ProxyHandler(proxies={})))
         def revert_install():
             mechanize.install_opener(old_opener_m)
             urllib2.install_opener(old_opener_u)
         self.add_teardown(revert_install)
Beispiel #43
0
    def __init__(self, feed_id, logging, timeout=5, uptime=False):
        threading.Thread.__init__(self)


        self.feed_id = feed_id
        self.timeout = timeout
        self.logger = logging.getLogger(__name__)

        self.opener = mechanize.build_opener()
        self.opener.addheaders = [('X-ApiKey', xively_key)]
        self.data = []
        self.payload = {}

        if uptime:
            self.add_uptime()
Beispiel #44
0
    def setUp(self):
        mechanize._testcase.TestCase.setUp(self)
        self.test_uri = urljoin(self.uri, "test_fixtures")
        self.server = self.get_cached_fixture("server")
        if self.no_proxies:
            old_opener_m = mechanize._opener._opener
            mechanize.install_opener(
                mechanize.build_opener(mechanize.ProxyHandler(proxies={})))
            install_opener(build_opener(ProxyHandler(proxies={})))

            def revert_install():
                mechanize.install_opener(old_opener_m)
                install_opener(None)

            self.add_teardown(revert_install)
Beispiel #45
0
def get_trash_zone(address, zip):
    #Make cookie jar.  See wwwsearch.sourceforge.dat/mechanize/hints.html
    cj = mechanize.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
    mechanize.install_opener(opener)

    #Save cookies
    cj.save(
        "/usr/local/django/recyclocity/recyclocity_static/cookies/cookie_jar",
        ignore_discard=True,
        ignore_expires=True)

    #Create a browser
    browser = mechanize.Browser()

    #Fill in form
    browser.open('http://lmt-web.lowermerion.org/cgi-bin/refuse2.plx')
    browser.form = list(browser.forms())[0]
    browser.form['askrecycle'] = address
    browser.form['postcode'] = zip

    #Submit form
    browser.submit()

    #Extract content
    content = browser.response().read()

    #Use pattern match to extract fields
    m = re.search('<b>(Monday|Tuesday|Wednesday|Thursday|Friday)</b>', content)
    if m:
        day, = m.groups()
        #Convert day to number
        day_number = schedule_helpers.get_day_number(day)
    else:
        #Failed
        return

    m = re.search('<b>Zone ([1-4])</b>', content)
    if m:
        zone, = m.groups()
    else:
        #Failed
        return

    #Match for both day and zone
    return day_number, zone
Beispiel #46
0
    def __init__(self, username, password):
        mechanize.Browser.__init__(self)
        cj = mechanize.LWPCookieJar()
        self.set_cookiejar(cj)
        self.set_handle_equiv(True)
        self.set_handle_redirect(True)
        self.set_handle_referer(True)
        self.set_handle_robots(False)
        self.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
        self.open(self.base_url)

        self.username = username
        self.password = password
        self.login()

        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
        mechanize.install_opener(opener)
Beispiel #47
0
    def __init__(self, feed_id, logging, keyfile="xively.key", timeout=5, uptime=False):
        threading.Thread.__init__(self)

        # private key stored in a file
        api_key = open(keyfile).readlines()[0].strip()

        self.feed_id = feed_id
        self.timeout = timeout
        self.logger = logging.getLogger('xively')

        self.opener = mechanize.build_opener()
        self.opener.addheaders = [('X-ApiKey', api_key)]
        self.data = []
        self.payload = {}

        if uptime:
            self.add_uptime()
Beispiel #48
0
    def get(self,URL,QueryString=None,headers=None):

        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(self.cookieJar))
        if headers:
            opener.addheaders = headers
        content = "" 

        if (QueryString):
            qs = urllib.urlencode(QueryString)
            response = opener.open(URL + '?' + qs)
            content = response

        else:
            response = opener.open(URL)
            content = response

        return content
Beispiel #49
0
def get_trash_zone(address, zip):

    #Make cookie jar.  See wwwsearch.sourceforge.dat/mechanize/hints.html
    cj = mechanize.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
    mechanize.install_opener(opener)

    #Create a browser
    browser = mechanize.Browser()

    #User-Agent (this is cheating, ok?)
    browser.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]

    #Save cookies
    cj.save(
        "/usr/local/django/recyclocity/recyclocity_static/cookies/cookie_jar",
        ignore_discard=True,
        ignore_expires=True)

    #Fill in form
    #browser.open('http://citymaps.phila.gov/portal/')
    #browser.select_form(name="form1")
    #browser.form['txtSearchAddress'] = address

    #Fill in form
    #browser.open('https://alpha.phila.gov/property/')
    #browser.open('http://www.lowermerion.org/cgi-bin/recycle2.plx/')
    browser.open(
        'http://www.lowermerion.org/services/public-works-department/refuse-and-recycling/how-to-determine-your-recycling-collection-day'
    )
    #browser.form = list(browser.forms())[0]
    #browser.form['askrecycle'] = address
    #browser.form['postcode'] = zip

    #Submit form
    #browser.submit()

    #Extract content
    content = browser.response().read()

    return content
Beispiel #50
0
    def __callRequest(self):
        cookieJar = mechanize.LWPCookieJar()
        try:  #TODO ohne try evtl.
            cookieJar.load(self._cookiePath, self.__bIgnoreDiscard,
                           self.__bIgnoreExpired)
        except Exception as e:
            logger.info(e)

        sParameters = urllib.urlencode(self.__aParameters, True)

        handlers = [
            SmartRedirectHandler, mechanize.HTTPEquivProcessor,
            mechanize.HTTPRefreshProcessor
        ]
        if sys.version_info >= (2, 7, 9) and sys.version_info < (2, 7, 11):
            handlers.append(newHTTPSHandler)
        opener = mechanize.build_opener(*handlers)
        if (len(sParameters) > 0):
            oRequest = mechanize.Request(self.__sUrl, sParameters)
        else:
            oRequest = mechanize.Request(self.__sUrl)

        for key, value in self.__headerEntries.items():
            oRequest.add_header(key, value)
        cookieJar.add_cookie_header(oRequest)

        if self.caching and self.cacheTime > 0:
            sContent = self.readCache(self.getRequestUri())
            if sContent:
                return sContent

        try:
            oResponse = opener.open(oRequest, timeout=self.requestTimeout)
        except mechanize.HTTPError, e:
            if e.code == 503 and e.headers.get("Server") == 'cloudflare-nginx':
                oResponse, cookieJar = cCFScrape().resolve(
                    oRequest, e, cookieJar)
            elif not self.ignoreErrors:
                xbmcgui.Dialog().ok('xStream', 'Fehler beim Abrufen der Url:',
                                    self.__sUrl, str(e))
                logger.error("HTTPError " + str(e) + " Url: " + self.__sUrl)
                return ''
            else:
                oResponse = e
Beispiel #51
0
    def __init__(self, username, password):
        mechanize.Browser.__init__(self)
        cj = mechanize.LWPCookieJar()
        self.set_cookiejar(cj)
        self.set_handle_equiv(True)
        self.set_handle_redirect(True)
        self.set_handle_referer(True)
        self.set_handle_robots(False)
        self.addheaders = [(
            'User-agent',
            'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
        )]
        self.open(self.base_url)

        self.username = username
        self.password = password
        self.login()

        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
        mechanize.install_opener(opener)
Beispiel #52
0
def go():
    '''
    Main procedure of the scraper. Creates a browser, load the list of tasks and execute them
    '''
    try:
        # Prepare the browser
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        mechanize.install_opener(opener)
        br = mechanize.Browser()
        br.set_handle_robots(False)
        br.set_handle_refresh(False)
        br.set_handle_referer(False)
        br.open("http://www.infogreffe.fr/infogreffe/process.do")

        # Get the list of tasks
        tasks = load_task_queue()
        if len(tasks) == 0:
            # If there is no task to execute, init/reset the table
            init_task_queue()
            tasks = load_task_queue()

        for task in tasks:
            try:
                # Execute the task
                results = get_companies(br, task['name'], task['dept'])

                # If we hit the soft limit, add more refined searches to the queue
                if results == 100:
                    print "Limit reached for %s in %s, adding new tasks" % (
                        task['name'], task['dept'])
                    expand_task_queue(task['name'], task['dept'])

                # Mark the task as done
                mark_task_done(task['name'], task['dept'], results)
            except Exception as detail:
                # We may get an exception for using too much CPU time.
                print "Exception raised", detail
    except Exception as detail:
        # If we can't open the browser, just skip running the scraper
        print "Failed starting browser ", detail
    def resolve(self, req, error, cookieJar):
        sleep(5)

        useragent = req.headers.get('User-agent')

        body = error.read()
        parsed_url = urlparse(error.url)
        submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme,
                                                      parsed_url.netloc)

        params = {}

        try:
            params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"',
                                           body).group(1)
            params["pass"] = re.search(r'name="pass" value="(.+?)"',
                                       body).group(1)

            js = self._extract_js(body)
        except:
            raise

        params["jschl_answer"] = str(js + len(parsed_url.netloc))

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(cookieJar))

        sParameters = urllib.urlencode(params, True)

        request = mechanize.Request("%s?%s" % (submit_url, sParameters))
        request.add_header('Referer', error.url)
        request.add_header('User-agent', useragent)

        try:
            response = opener.open(request)
        except:
            raise

        return response, cookieJar
Beispiel #54
0
    def logIn(self):
        """
        Logs in to private archives using the supplied email and password.
        Stores the cookie so we can continue to get subsequent pages.
        """

        cookieJar = mechanize.CookieJar()

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(cookieJar))
        opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible)")]
        mechanize.install_opener(opener)

        self.message('Logging in to ' + self.list_url)
        fp = mechanize.urlopen(self.list_url)
        forms = ClientForm.ParseResponse(fp, backwards_compat=False)
        fp.close()

        form = forms[0]
        form['username'] = self.username
        form['password'] = self.password
        fp = mechanize.urlopen(form.click())
        fp.close()