Ejemplo n.º 1
1
    def test_cookies(self):
        import urllib2
        # this test page depends on cookies, and an http-equiv refresh
        #cj = CreateBSDDBCookieJar("/home/john/db.db")
        cj = CookieJar()
        handlers = [
            HTTPCookieProcessor(cj),
            HTTPRefreshProcessor(max_time=None, honor_time=False),
            HTTPEquivProcessor(),

            HTTPRedirectHandler(),  # needed for Refresh handling in 2.4.0
#            HTTPHandler(True),
#            HTTPRedirectDebugProcessor(),
#            HTTPResponseDebugProcessor(),
            ]

        o = apply(build_opener, handlers)
        try:
            install_opener(o)
            try:
                r = urlopen(urljoin(self.uri, "/cgi-bin/cookietest.cgi"))
            except urllib2.URLError, e:
                #print e.read()
                raise
            data = r.read()
            #print data
            self.assert_(
                data.find("Your browser supports cookies!") >= 0)
            self.assert_(len(cj) == 1)

            # test response.seek() (added by HTTPEquivProcessor)
            r.seek(0)
            samedata = r.read()
            r.close()
            self.assert_(samedata == data)
Ejemplo n.º 2
0
    def _retrieve_product(cls, url):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        browser = mechanize.Browser()
        product_data = browser.open(url).get_data()
        soup = BeautifulSoup(product_data)

        product_name = soup.find('h1').string.encode('ascii', 'ignore')
        product_price = soup.find('span', {'id': 'product_price'})
        product_price = Decimal(clean_price_string(product_price.string))

        payment_methods = ['cash', 'deposit', 'wire_transfer']

        additional_data = soup.find('td', 'descr').findAll('h3')

        if not additional_data:
            payment_methods.extend(['debit_card', 'credit_card'])
        elif additional_data[0].string and 'Contado' not in \
                                           additional_data[0].string:
            payment_methods.extend(['debit_card', 'credit_card'])

        prices = {}
        for p in payment_methods:
            prices[p] = product_price

        return [product_name, prices]
Ejemplo n.º 3
0
Archivo: slurping.py Proyecto: yz-/ut
def slurp_with_login_and_pwd():
    import sys
    import mechanize
    # sys.path.append('ClientCookie-1.0.3')
    # from mechanize import ClientCookie
    # sys.path.append('ClientForm-0.1.17')
    # import ClientForm

    # Create special URL opener (for User-Agent) and cookieJar
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    fp = mechanize.urlopen("http://login.yahoo.com")
    forms = mechanize.ParseResponse(fp)
    fp.close()

    # print forms on this page
    for form in forms:
        print "***************************"
        print form

    form = forms[0]
    form["login"]  = "******" # use your userid
    form["passwd"] = "password"      # use your password
    fp = mechanize.urlopen(form.click())
    fp.close()
    fp = mechanize.urlopen("https://class.coursera.org/ml-003/lecture/download.mp4?lecture_id=1") # use your group
    fp.readlines()
    fp.close()
Ejemplo n.º 4
0
    def _checkStoredInjections(self):
        for r in self.results:
            # At this state injections in Result obj are not
            # compacted yet so it will only be 1st injected param
            url, data = r.target.getPayloadedUrl(r.first_param, "")
            
            # In case of proxy 
            if self.engine.getOption('http-proxy') is not None:
                proxy = ProxyHandler({'http': self.engine.getOption('http-proxy')})
                opener = build_opener(proxy)
                install_opener(opener)
            
            # Some headers
            if self.engine.getOption('ua') is not None:
                if self.engine.getOption('ua') is "RANDOM":
                    headers = {'User-Agent': random.choice(USER_AGENTS)}
                else:
                    headers = {'User-Agent': self.engine.getOption('ua')}
            else:
                headers = {}
            if self.engine.getOption("cookie") is not None:
                headers["Cookie"] = self.engine.getOption("cookie")

            # Build the request
            req = Request(url, data, headers)
            try:
                to = 10 if self.engine.getOption('http-proxy') is None else 20
                response = urlopen(req, timeout=to)
            except HTTPError, e:
                self._addError(e.code, r.target.getAbsoluteUrl())
                continue 
            except URLError, e:
                self._addError(e.reason, r.target.getAbsoluteUrl())
                continue
Ejemplo n.º 5
0
    def init(self):
        br = mechanize.Browser()
        br.set_handle_robots(False)
        
        self.cj = mechanize.LWPCookieJar()
        br.set_cookiejar(self.cj)
        
        br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(),
                              max_time=1)
        
        br.open("https://www.tumblr.com/login")
        br.select_form(nr=0)
        
        br['user[email]'] = ""
        br['user[password]'] = ""
        
        url, data, hdrs = br.form.click_request_data()
        br.open("https://www.tumblr.com/login", data)

        self.nf = 0

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(self.cj))
        mechanize.install_opener(opener)
        self._fetch()
Ejemplo n.º 6
0
    def _performInjections(self, target):
        # Check every parameter 
        for k, v in target.params.iteritems():
            pl = Payload(taint=True)
            url, data = target.getPayloadedUrl(k, pl.payload)
            
            # In case of proxy 
            if self.engine.getOption('http-proxy') is not None:
                proxy = ProxyHandler({'http': self.engine.getOption('http-proxy')})
                opener = build_opener(proxy)
                install_opener(opener)
            # Some headers
            if self.engine.getOption('ua') is not None:
                if self.engine.getOption('ua') is "RANDOM":
                    headers = {'User-Agent': random.choice(USER_AGENTS)}
                else:
                    headers = {'User-Agent': self.engine.getOption('ua')}
            else:
                headers = {}
            if self.engine.getOption("cookie") is not None:
                headers["Cookie"] = self.engine.getOption("cookie")

            # Build the request
            req = Request(url, data, headers)
            try:
                to = 10 if self.engine.getOption('http-proxy') is None else 20
                response = urlopen(req, timeout=to)
            except HTTPError, e:
                self._addError(e.code, target.getAbsoluteUrl())
                return
            except URLError, e:
                self._addError(e.reason, target.getAbsoluteUrl())
                return
def readUrl(inUrl):

    tryCount = 0
    while tryCount < 5 :
#        print "Create CookieJar"
        cookies = mechanize.CookieJar()
#        print "Build Opener"
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
#        print "Add Headers"
        opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible; MyProgram/0.1)"),("From", "*****@*****.**")]
#        print "Install Opener"
        mechanize.install_opener(opener)
        try:
#            print "Open URL"
            response = mechanize.urlopen(inUrl)
            tryCount = 99
        except:
            tryCount += 1
            print "******** Error on urlopen ***********"
            print "URL: ", inUrl
            print "Trying Again....", tryCount

#    print response.read()
#    html = urllib.urlopen(inUrl).read()
#    print "Reading Response"
    html = response.read()
#    print "Response Read:", html[0:100]
    root = lxml.html.fromstring(html)
#    print "Root created: ", root

    return root
    def test_cookies(self):
        import urllib2
        # this test page depends on cookies, and an http-equiv refresh
        #cj = CreateBSDDBCookieJar("/home/john/db.db")
        cj = CookieJar()
        handlers = [
            HTTPCookieProcessor(cj),
            HTTPRefreshProcessor(max_time=None, honor_time=False),
            HTTPEquivProcessor(),
            HTTPRedirectHandler(),  # needed for Refresh handling in 2.4.0
            #            HTTPHandler(True),
            #            HTTPRedirectDebugProcessor(),
            #            HTTPResponseDebugProcessor(),
        ]

        o = apply(build_opener, handlers)
        try:
            install_opener(o)
            try:
                r = urlopen(urljoin(self.uri, "/cgi-bin/cookietest.cgi"))
            except urllib2.URLError, e:
                #print e.read()
                raise
            data = r.read()
            #print data
            self.assert_(data.find("Your browser supports cookies!") >= 0)
            self.assert_(len(cj) == 1)

            # test response.seek() (added by HTTPEquivProcessor)
            r.seek(0)
            samedata = r.read()
            r.close()
            self.assert_(samedata == data)
Ejemplo n.º 9
0
def customizeUserAgent():
    import mechanize
    cookies = mechanize.CookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
    # Pretend to be Chrome to avoid getting the mobile site.
    opener.addheaders = [("User-agent", "Chrome/16.0.912.63")]
    mechanize.install_opener(opener)
Ejemplo n.º 10
0
def readUrl(inUrl):

    tryCount = 0
    while tryCount < 5:
        #        print "Create CookieJar"
        cookies = mechanize.CookieJar()
        #        print "Build Opener"
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        #        print "Add Headers"
        opener.addheaders = [("User-agent",
                              "Mozilla/5.0 (compatible; MyProgram/0.1)"),
                             ("From", "*****@*****.**")]
        #        print "Install Opener"
        mechanize.install_opener(opener)
        try:
            #            print "Open URL"
            response = mechanize.urlopen(inUrl)
            tryCount = 99
        except:
            tryCount += 1
            print "******** Error on urlopen ***********"
            print "URL: ", inUrl
            print "Trying Again....", tryCount


#    print response.read()
#    html = urllib.urlopen(inUrl).read()
#    print "Reading Response"
    html = response.read()
    #    print "Response Read:", html[0:100]
    root = lxml.html.fromstring(html)
    #    print "Root created: ", root

    return root
Ejemplo n.º 11
0
    def retrieve_product_data(self, product_link):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        browser = mechanize.Browser()
        product_data = browser.open(product_link).get_data()
        soup = BeautifulSoup(product_data)

        product_name = soup.find('title').string.encode('ascii', 'ignore')

        product_prices = soup.find('div', 'price').contents

        try:
            cash_price = int(clean_price_string(product_prices[4]))

            product_data = ProductData()
            product_data.custom_name = product_name
            product_data.price = cash_price
            product_data.url = product_link
            product_data.comparison_field = product_link

            return product_data
        except IndexError:
            return None
Ejemplo n.º 12
0
    def retrieve_product_links(self):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        url_base = 'http://www.globalmac.cl/'

        browser = mechanize.Browser()

        url_extensions = [
            ['Distribuidor-Apple-Chile/MacBook-Air', 'Notebook'],
            ['Distribuidor-Apple-Chile/MacBook-Pro', 'Notebook'],
            ['Hardware-Mac-PC/Discos-Duros-Notebook-SATA-2.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SATA-3.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SSD-SATA-2.5', 'StorageDrive'],
        ]

        product_links = []

        for url_extension, ptype in url_extensions:
            url = url_base + url_extension
            base_data = browser.open(url).get_data()
            soup = BeautifulSoup(base_data)

            for item in soup.findAll('div', 'name'):
                product_links.append([item.find('a')['href'], ptype])

        return product_links
Ejemplo n.º 13
0
def slurp_with_login_and_pwd():
    import sys
    import mechanize
    # sys.path.append('ClientCookie-1.0.3')
    # from mechanize import ClientCookie
    # sys.path.append('ClientForm-0.1.17')
    # import ClientForm

    # Create special URL opener (for User-Agent) and cookieJar
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    fp = mechanize.urlopen("http://login.yahoo.com")
    forms = mechanize.ParseResponse(fp)
    fp.close()

    # print forms on this page
    for form in forms:
        print "***************************"
        print form

    form = forms[0]
    form["login"] = "******"  # use your userid
    form["passwd"] = "password"  # use your password
    fp = mechanize.urlopen(form.click())
    fp.close()
    fp = mechanize.urlopen(
        "https://class.coursera.org/ml-003/lecture/download.mp4?lecture_id=1"
    )  # use your group
    fp.readlines()
    fp.close()
Ejemplo n.º 14
0
    def retrieve_product_data(self, product_link):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        browser = mechanize.Browser()
        product_data = browser.open(product_link).get_data()
        soup = BeautifulSoup(product_data)

        product_name = soup.find('title').string.encode('ascii', 'ignore')

        product_prices = soup.find('div', 'price').contents

        try:
            cash_price = int(clean_price_string(product_prices[4]))

            product_data = ProductData()
            product_data.custom_name = product_name
            product_data.price = cash_price
            product_data.url = product_link
            product_data.comparison_field = product_link

            return product_data
        except IndexError:
            return None
Ejemplo n.º 15
0
    def retrieve_product_links(self):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        url_base = 'http://www.globalmac.cl/'

        browser = mechanize.Browser()

        url_extensions = [
            ['Distribuidor-Apple-Chile/MacBook-Air', 'Notebook'],
            ['Distribuidor-Apple-Chile/MacBook-Pro', 'Notebook'],
            ['Hardware-Mac-PC/Discos-Duros-Notebook-SATA-2.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SATA-3.5', 'StorageDrive'],
            ['Hardware-Mac-PC/Discos-Duros-SSD-SATA-2.5', 'StorageDrive'],
            ]

        product_links = []

        for url_extension, ptype in url_extensions:
            url = url_base + url_extension
            base_data = browser.open(url).get_data()
            soup = BeautifulSoup(base_data)

            for item in soup.findAll('div', 'name'):
                product_links.append([item.find('a')['href'], ptype])

        return product_links
Ejemplo n.º 16
0
	def __init__(self, username="******", password="******"):
		self.username = "******"+username
		self.password = password
		self.password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
		ntlm_auth = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(self.password_manager)
		opener = mechanize.build_opener(ntlm_auth)
		mechanize.install_opener(opener)
Ejemplo n.º 17
0
def GetHtml(url):
	opener = mechanize.build_opener()
	opener.addheaders = [("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0) Gecko/20100101 Firefox/4.0")]
	mechanize.install_opener(opener)
	request = mechanize.urlopen(url)
	html = request.read()
	request.close()
	return html
Ejemplo n.º 18
0
def themain():
    #browser=mechanize.Browser()
    #browser.open('http://www.baidu.com')
    cj = mechanize.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
    mechanize.install_opener(opener)
    r = mechanize.urlopen('http://www.baidu.com')
    cj.save('cookie.txt', ignore_discard=True, ignore_expires=True)
Ejemplo n.º 19
0
 def __init__(self, username="******", password="******"):
     self.username = "******" + username
     self.password = password
     self.password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
     ntlm_auth = HTTPNtlmAuthHandler.HTTPNtlmAuthHandler(
         self.password_manager)
     opener = mechanize.build_opener(ntlm_auth)
     mechanize.install_opener(opener)
Ejemplo n.º 20
0
def setup_mechanize():
    """
    Set up user agent for all mechanize calls.
    """
    cookies = mechanize.CookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
    homepage = "http://github.com/aszlig/picfetcher"
    opener.addheaders = [("User-agent", "PicFetcher/0.1.0 (+%s)" % homepage)]
    mechanize.install_opener(opener)
Ejemplo n.º 21
0
def openUrl(url, cookie=None, login=False):
    """
    Opens a given url through mechanize. 

    If there is no cookie (string path) passed in or if there is a cooke path
    passed in but the login parameter is False (signifying to open the url with
    cookie saved in the cookie path), the html from the opened url is returned
    as a string.

    If a cookie path is passed in and the login parameter is True, then the
    Mechanize.Broswer object is returned to perform a yogaglo login through
    a form submission.

    """
    browser = mechanize.Browser()
    browser.addheaders = [
        ('User-Agent',
         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:24.0) Gecko/20100101 Firefox/24.0'),
        ('Accept',
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
        ('Accept-Language', 'en-gb,en;q=0.5'),
        ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'),
        ('Keep-Alive', '115'),
        ('Connection', 'keep-alive'),
        ('Cache-Control', 'max-age=0'),
    ]

    #Experimental?
    # browser.set_handle_gzip(True)
    browser.set_handle_redirect(True)
    browser.set_handle_referer(True)
    browser.set_handle_robots(False)
    browser.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time = 1)
    
    if not cookie is None:
	cj = cookielib.LWPCookieJar()
	browser.set_cookiejar(cj)
	opener = mechanize.build_opener(HTTPCookieProcessor(cj))
	mechanize.install_opener(opener)
	
	# trying to login, no cookie, must return browser so it can follow the
	# login url
	if login is True:
		browser.open(url)
		return browser
		
	# can't set to expire, can't read when this particular cookie expires
	cj.load(cookie , ignore_discard=True)

    return browser.open(url).read()
Ejemplo n.º 22
0
def initialize_browser():
    """Configurações para contornar os cookies, robots.txt e outros para fingir ser um browser normal."""
    cookiejar = cookielib.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookiejar))
    mechanize.install_opener(opener)
    browser = mechanize.Browser()
    browser.set_handle_robots(False)
    browser.set_handle_redirect(True)
    browser.set_cookiejar(cookiejar)
    browser.set_handle_equiv(True)
    browser.set_handle_referer(True)
    browser.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=2)
    browser.addheaders = [('User-agent', 'Google Chrome')]
    return browser, cookiejar  
Ejemplo n.º 23
0
 def __init__(self):
     self.cj = mechanize.LWPCookieJar()
     opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(self.cj))
     mechanize.install_opener(opener)
     self.br = mechanize.Browser()
     self.br.set_cookiejar(self.cj)
     self.sessionkey = 'None'
     self.br.set_header(
         'User-Agent',
         value=
         'Mozilla/5.0 (X11; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0'
     )
     # self.br.set_debug_http(True)
     self.br.set_debug_redirects(True)
Ejemplo n.º 24
0
 def fillform(self, form, choice,questionid,sessionid,charturl,user,password):
     if choice != "Random":
         for i in range(1, 5):
             form[questionid + str(i)] = [choice]
     else:
         for i in range(1, 5):
             form[questionid + str(i)] = [str(random.randint(1, 5))]
     data = form.click().get_data()
     charturl += sessionid + "&questionid=" + questionid + "&qtype=" + "LS"
     opener = self.addAuthentication(charturl, user, password)
     mechanize.install_opener(opener)
     req = mechanize.Request(charturl, data)
     req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
     req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
     req.add_header('Accept-Encoding', 'gzip,deflate')
Ejemplo n.º 25
0
    def setUp(self):
        mechanize._testcase.TestCase.setUp(self)
        self.test_uri = urljoin(self.uri, "test_fixtures")
        self.server = self.get_cached_fixture("server")
        if self.no_proxies:
            old_opener_m = mechanize._opener._opener
            mechanize.install_opener(
                mechanize.build_opener(mechanize.ProxyHandler(proxies={})))
            install_opener(build_opener(ProxyHandler(proxies={})))

            def revert_install():
                mechanize.install_opener(old_opener_m)
                install_opener(None)

            self.add_teardown(revert_install)
Ejemplo n.º 26
0
 def setUp(self):
     mechanize._testcase.TestCase.setUp(self)
     self.test_uri = urljoin(self.uri, "test_fixtures")
     self.server = self.get_cached_fixture("server")
     if self.no_proxies:
         old_opener_m = mechanize._opener._opener
         old_opener_u = urllib2._opener
         mechanize.install_opener(mechanize.build_opener(
                 mechanize.ProxyHandler(proxies={})))
         urllib2.install_opener(urllib2.build_opener(
                 urllib2.ProxyHandler(proxies={})))
         def revert_install():
             mechanize.install_opener(old_opener_m)
             urllib2.install_opener(old_opener_u)
         self.add_teardown(revert_install)
Ejemplo n.º 27
0
    def __init__(self, username, password):
        mechanize.Browser.__init__(self)
        cj = mechanize.LWPCookieJar()
        self.set_cookiejar(cj)
        self.set_handle_equiv(True)
        self.set_handle_redirect(True)
        self.set_handle_referer(True)
        self.set_handle_robots(False)
        self.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
        self.open(self.base_url)

        self.username = username
        self.password = password
        self.login()

        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
        mechanize.install_opener(opener)
Ejemplo n.º 28
0
def get_trash_zone(address, zip):
    #Make cookie jar.  See wwwsearch.sourceforge.dat/mechanize/hints.html
    cj = mechanize.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
    mechanize.install_opener(opener)

    #Save cookies
    cj.save(
        "/usr/local/django/recyclocity/recyclocity_static/cookies/cookie_jar",
        ignore_discard=True,
        ignore_expires=True)

    #Create a browser
    browser = mechanize.Browser()

    #Fill in form
    browser.open('http://lmt-web.lowermerion.org/cgi-bin/refuse2.plx')
    browser.form = list(browser.forms())[0]
    browser.form['askrecycle'] = address
    browser.form['postcode'] = zip

    #Submit form
    browser.submit()

    #Extract content
    content = browser.response().read()

    #Use pattern match to extract fields
    m = re.search('<b>(Monday|Tuesday|Wednesday|Thursday|Friday)</b>', content)
    if m:
        day, = m.groups()
        #Convert day to number
        day_number = schedule_helpers.get_day_number(day)
    else:
        #Failed
        return

    m = re.search('<b>Zone ([1-4])</b>', content)
    if m:
        zone, = m.groups()
    else:
        #Failed
        return

    #Match for both day and zone
    return day_number, zone
Ejemplo n.º 29
0
def get_trash_zone(address, zip):

    #Make cookie jar.  See wwwsearch.sourceforge.dat/mechanize/hints.html
    cj = mechanize.LWPCookieJar()
    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
    mechanize.install_opener(opener)

    #Create a browser
    browser = mechanize.Browser()

    #User-Agent (this is cheating, ok?)
    browser.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]

    #Save cookies
    cj.save(
        "/usr/local/django/recyclocity/recyclocity_static/cookies/cookie_jar",
        ignore_discard=True,
        ignore_expires=True)

    #Fill in form
    #browser.open('http://citymaps.phila.gov/portal/')
    #browser.select_form(name="form1")
    #browser.form['txtSearchAddress'] = address

    #Fill in form
    #browser.open('https://alpha.phila.gov/property/')
    #browser.open('http://www.lowermerion.org/cgi-bin/recycle2.plx/')
    browser.open(
        'http://www.lowermerion.org/services/public-works-department/refuse-and-recycling/how-to-determine-your-recycling-collection-day'
    )
    #browser.form = list(browser.forms())[0]
    #browser.form['askrecycle'] = address
    #browser.form['postcode'] = zip

    #Submit form
    #browser.submit()

    #Extract content
    content = browser.response().read()

    return content
Ejemplo n.º 30
0
def acm(query_str):
    acm_url = u"http://dl.acm.org/"
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    
    fp = mechanize.urlopen(acm_url)
    forms = mechanize.ParseResponse(fp, backwards_compat=False)
    fp.close()
    #doc = fetch(acm_url)
    form = forms[0]
    form['query'] = query_str
    fp = mechanize.urlopen(form.click())
    doc = fp.read()
    with open("acm.html", 'wb') as fo:
        fo.write(doc)
    fp.close()
Ejemplo n.º 31
0
    def _product_urls_and_types(cls, product_types):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)
        url_base = 'http://www.globalmac.cl/'

        browser = mechanize.Browser()

        url_extensions = [
            ['MacBook/', 'Notebook'],
            ['MacBook-Pro/', 'Notebook'],
            ['Monitores-LCD/', 'Monitor'],
            ['Cinema-Display/', 'Monitor'],
            ['Disco-Duro-SATA-2.5/', 'StorageDrive'],
            ['Discos-Duros-SATA/', 'StorageDrive'],
        ]

        if 'Ram' in product_types:
            memory_catalog_url = url_base + 'Memorias/'
            base_data = browser.open(memory_catalog_url).get_data()
            soup = BeautifulSoup(base_data)
            subcats = soup.findAll('span', 'subcategories')
            for subcat in subcats:
                link = subcat.find('a')['href'].replace(url_base, '')
                url_extensions.append([link, 'Ram'])

        product_links = []

        for url_extension, ptype in url_extensions:
            if ptype not in product_types:
                continue
            base_data = browser.open(url_base + url_extension).get_data()
            soup = BeautifulSoup(base_data)

            titles = soup.findAll('a', 'product-title')

            for title in titles:
                product_links.append([title['href'], ptype])

        return product_links
Ejemplo n.º 32
0
    def __init__(self, username, password):
        mechanize.Browser.__init__(self)
        cj = mechanize.LWPCookieJar()
        self.set_cookiejar(cj)
        self.set_handle_equiv(True)
        self.set_handle_redirect(True)
        self.set_handle_referer(True)
        self.set_handle_robots(False)
        self.addheaders = [(
            'User-agent',
            'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
        )]
        self.open(self.base_url)

        self.username = username
        self.password = password
        self.login()

        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
        mechanize.install_opener(opener)
Ejemplo n.º 33
0
def go():
    '''
    Main procedure of the scraper. Creates a browser, load the list of tasks and execute them
    '''
    try:
        # Prepare the browser
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        mechanize.install_opener(opener)
        br = mechanize.Browser()
        br.set_handle_robots(False)
        br.set_handle_refresh(False)
        br.set_handle_referer(False)
        br.open("http://www.infogreffe.fr/infogreffe/process.do")

        # Get the list of tasks
        tasks = load_task_queue()
        if len(tasks) == 0:
            # If there is no task to execute, init/reset the table
            init_task_queue()
            tasks = load_task_queue()

        for task in tasks:
            try:
                # Execute the task
                results = get_companies(br, task['name'], task['dept'])

                # If we hit the soft limit, add more refined searches to the queue
                if results == 100:
                    print "Limit reached for %s in %s, adding new tasks" % (
                        task['name'], task['dept'])
                    expand_task_queue(task['name'], task['dept'])

                # Mark the task as done
                mark_task_done(task['name'], task['dept'], results)
            except Exception as detail:
                # We may get an exception for using too much CPU time.
                print "Exception raised", detail
    except Exception as detail:
        # If we can't open the browser, just skip running the scraper
        print "Failed starting browser ", detail
Ejemplo n.º 34
0
 def fillform(self, form, choice, questionid, sessionid, charturl, user,
              password):
     if choice != "Random":
         for i in range(1, 5):
             form[questionid + str(i)] = [choice]
     else:
         for i in range(1, 5):
             form[questionid + str(i)] = [str(random.randint(1, 5))]
     data = form.click().get_data()
     charturl += sessionid + "&questionid=" + questionid + "&qtype=" + "LS"
     opener = self.addAuthentication(charturl, user, password)
     mechanize.install_opener(opener)
     req = mechanize.Request(charturl, data)
     req.add_header(
         'User-Agent',
         'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
     )
     req.add_header(
         'Accept',
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
     req.add_header('Accept-Encoding', 'gzip,deflate')
Ejemplo n.º 35
0
    def __init__(self, login=login.facebook):
        super(KaggLoader, self).__init__()

        self.login = login

        self.set_handle_equiv(True)
        self.set_handle_robots(False)

        if not os.path.exists(self.BASE_DIR):
            os.makedirs(self.BASE_DIR)

        if not os.path.exists(self.COOKIE_PATH):
            with open(self.COOKIE_PATH, 'w') as f:
                f.write('#LWP-Cookies-2.0')

        self.cj = mechanize.LWPCookieJar()
        self.cj.load(self.COOKIE_PATH, ignore_discard=False, ignore_expires=False)
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(self.cj))
        mechanize.install_opener(opener)

        self.set_cookiejar(self.cj)
Ejemplo n.º 36
0
def pay_me_now(username, password):
	if DEBUG:
		import sys, logging
		logger = logging.getLogger("mechanize")
		logger.addHandler(logging.StreamHandler(sys.stdout))
		logger.setLevel(logging.DEBUG)

	cookies = mechanize.CookieJar()
	opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
	opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible; BTCGPayOut/0.1)")]
	mechanize.install_opener(opener)

	br = mechanize.Browser()
	if DEBUG:
		br.set_debug_http(True)
		br.set_debug_responses(True)
		br.set_debug_redirects(True)

	br.set_handle_robots(False)

	# login
	try:
		br.open(URL_LOGIN)
		br.select_form(predicate=select_login_form)
		br['username'] = username
		br['password'] = password
		br.submit()
		br.select_form(predicate=select_pay_me_now_form)
	except:
		print "Failed to login"
		return

	# logged in
	try:
		br.select_form(predicate=select_pay_me_now_form)
	except:
		print "Failed to find withdraw form"
		return

	br.submit()
Ejemplo n.º 37
0
def go():
    '''
    Main procedure of the scraper. Creates a browser, load the list of tasks and execute them
    '''
    try:
        # Prepare the browser
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        mechanize.install_opener(opener)
        br = mechanize.Browser()
        br.set_handle_robots(False)
        br.set_handle_refresh(False)
        br.set_handle_referer(False)
        br.open("http://www.infogreffe.fr/infogreffe/process.do")
    
        # Get the list of tasks
        tasks = load_task_queue()
        if len(tasks) == 0:
            # If there is no task to execute, init/reset the table
            init_task_queue()
            tasks = load_task_queue()
    
        for task in tasks:
            try:
                # Execute the task
                results = get_companies(br, task['name'], task['dept'])
        
                # If we hit the soft limit, add more refined searches to the queue
                if results == 100:
                    print "Limit reached for %s in %s, adding new tasks" % (task['name'], task['dept'])
                    expand_task_queue(task['name'], task['dept'])
    
                # Mark the task as done
                mark_task_done(task['name'], task['dept'], results)
            except Exception as detail:
                # We may get an exception for using too much CPU time.
                print "Exception raised", detail
    except Exception as detail:
        # If we can't open the browser, just skip running the scraper
        print "Failed starting browser ", detail
    def logIn(self):
        """
        Logs in to private archives using the supplied email and password.
        Stores the cookie so we can continue to get subsequent pages.
        """
        
        cookieJar = mechanize.CookieJar()

        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
        opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
        mechanize.install_opener(opener)
        
        self.message('Logging in to '+self.list_url)
        fp = mechanize.urlopen(self.list_url)
        forms = ClientForm.ParseResponse(fp, backwards_compat=False)
        fp.close()

        form = forms[0]
        form['username'] = self.username
        form['password'] = self.password
        fp = mechanize.urlopen(form.click())
        fp.close()
Ejemplo n.º 39
0
    def logIn(self):
        """
        Logs in to private archives using the supplied email and password.
        Stores the cookie so we can continue to get subsequent pages.
        """

        cookieJar = mechanize.CookieJar()

        opener = mechanize.build_opener(
            mechanize.HTTPCookieProcessor(cookieJar))
        opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible)")]
        mechanize.install_opener(opener)

        self.message('Logging in to ' + self.list_url)
        fp = mechanize.urlopen(self.list_url)
        forms = ClientForm.ParseResponse(fp, backwards_compat=False)
        fp.close()

        form = forms[0]
        form['username'] = self.username
        form['password'] = self.password
        fp = mechanize.urlopen(form.click())
        fp.close()
Ejemplo n.º 40
0
    def run(self):
      self.tsession = Session()
      try:
        while True:
          twitter_user_id = self.queue.get()
          twitter_user = self.tsession.query(TwitterUser).filter(TwitterUser.id == twitter_user_id).first()

          # Mechanize setup
          cookies = cookielib.CookieJar()
          opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
          opener.addheaders = [("User-agent",
                                "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/533.4 (KHTML, like Gecko) Version/4.1 Safari/533.4")
            , ]
          mechanize.install_opener(opener)

          # Load the twitter page
          page = None
          try:
            tries = 5
            while (tries > 0):
              try:
                page = mechanize.urlopen("https://mobile.twitter.com/" + twitter_user.twitter_screen_name)
              except mechanize.HTTPError, e:
                if e.code != 404:
                  tries = tries - 1
                  pass
                else:
                  raise
              except Exception, e:
                raise
            if tries == 0:
              print "Error 403 para %s" % twitter_user.twitter_screen_name
              sys.stdout.write("Error 403 para " + twitter_user.twitter_screen_name + "\n")
              sys.stdout.flush()
              self.queue.task_done()
              continue
Ejemplo n.º 41
0
    def getformpage(self, user, password, choice):
        projectgroupurl = "http://leo.rp.edu.sg//workspace/studentModule.asp?site="
        qnnurl = "http://leo3.rp.edu.sg//projectweb/group_evaluation.asp?"
        quizurl = "http://leo3.rp.edu.sg//projectweb/qnn_take.asp?"
        sessionurl = "http://leo3.rp.edu.sg//projectweb/qnn_preset.asp?"
        charturl = "http://leo3.rp.edu.sg//projectweb/response_chart.asp?"
        saveurl = "http://leo3.rp.edu.sg//projectweb/qnn_save_responses.asp"
        urllist = []
        for i in range(1, 4):
            urllist.append(projectgroupurl + str(i))

        # retrieve the result
        currentModule = "projectid"
        currentProblem = "groupid"
        try:
            for url in urllist:
                opener = self.addAuthentication(url, user, password)
                mechanize.install_opener(opener)
                response = mechanize.Request(url)
                page = urlopen(response).read()
                if ("Wrong Password" in page or "Wrong ID" in page):
                    self.vNP.set("Sorry, USERNAME or PASSWORD wrong!")
                elif ('''ToggleDisplay''' in page):
                    currentModule = self.getidlist("projectid", page)[-1]
                    currentProblem = self.getidlist("groupid", page)[-1]
            if (currentModule != "projectid" and currentProblem != "groupid"):
                getqnnurl = qnnurl + currentModule + "&" + currentProblem + "&lang=ISO-8859-1"
                opener = self.addAuthentication(getqnnurl, user, password)
                mechanize.install_opener(opener)
                response = mechanize.Request(getqnnurl)
                getqnnpage = urlopen(response)
                forms = ParseResponse(getqnnpage, backwards_compat=False)
                form = forms[0]
                qnnid = form["qnnid"]
                evalid = form["evalid"]
                opener = self.addAuthentication(getqnnurl, user, password)
                mechanize.install_opener(opener)
                response = mechanize.Request(getqnnurl)
                getqnnpageread = urlopen(response).read()
                author_evaluatorlist = re.findall(r"'\d{5}', '.{38}'",
                                                  getqnnpageread)
                #for i in range(len(author_evaluatorlist)):
                authorid = author_evaluatorlist[0][1:6]
                evaluatorid = author_evaluatorlist[0][10:-1]
                getsessionurl = sessionurl + "&qnnid=" + qnnid + "&" + currentModule + "&" + currentProblem + "&evalid=" + evalid + "&evaltype=P" + "&authorid=" + authorid + "&evaluatorid=" + evaluatorid + "&lang=ISO-8859-1"
                opener = self.addAuthentication(getsessionurl, user, password)
                mechanize.install_opener(opener)
                response = mechanize.Request(getsessionurl)
                getqnnpage = urlopen(response)
                forms = ParseResponse(getqnnpage, backwards_compat=False)
                form = forms[0]
                form.set_all_readonly(False)
                form["qnnid"] = qnnid
                form["authorid"] = authorid
                form["evaluatorid"] = evaluatorid
                form["evaltype"] = "P"
                form["lang"] = "ISO-8859-1"
                form["newflag"] = "0"
                form["evalid"] = evalid
                form["groupid"] = currentProblem[8:]
                form["projectid"] = currentModule[10:]
                submit = form.click()
                data = submit.get_data()
                opener = self.addAuthentication(quizurl, user, password)
                mechanize.install_opener(opener)
                response = mechanize.Request(quizurl, data)
                sessionid = self.getidlist("sessionid",
                                           urlopen(response).read())[0]
                answerurl = re.search(
                    "(\<FRAME NAME=\"main\" SRC=\")(.+)(\"\>)",
                    urlopen(response).read()).group(2)
                answerurl = "http://leo3.rp.edu.sg//projectweb/" + answerurl
                opener = self.addAuthentication(answerurl, user, password)
                mechanize.install_opener(opener)
                rs = mechanize.Request(answerurl, data)
                quiz = urlopen(rs)
                quizpage = urlopen(rs).read()
                questionid = re.search(r"\{.+\}num", quizpage).group()[0:-3]
                forms = ParseResponse(quiz, backwards_compat=False)
                form = forms[0]
                self.fillform(form, choice, questionid, sessionid, charturl,
                              user, password)
                form.set_all_readonly(False)
                form["finish"] = "MANUAL"
                print form
                '''
                data = form.click().get_data()
                opener = self.addAuthentication(saveurl, user, password)
                mechanize.install_opener(opener)
                req = mechanize.Request(saveurl, data)
                req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
                req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
                req.add_header('Accept-Encoding', 'gzip,deflate')
                print urlopen(req).read()
                '''
            else:
                self.vNP.set("Sorry, TODAY NO MODULE!")
        except mechanize.HTTPError, e:
            self.vNP.set(
                "Error:",
                BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code])
Ejemplo n.º 42
0
 def revert_install():
     mechanize.install_opener(old_opener_m)
     install_opener(None)
Ejemplo n.º 43
0
import mechanize

cookies = mechanize.CookieJar()

cookie_opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))

mechanize.install_opener(cookie_opener)

url = "http://www.webscantest.com/crosstraining/aboutyou.php"

res = mechanize.urlopen(url)
content = res.read()
print len(content), content[0:100]
Ejemplo n.º 44
0
    def __init__(self, ip):
        self.ip = ip
        self.neighbours = {}
        self.laser_ports = {}
        self.new_adm = False
        #print(self.ip, type(self.ip))
        self.baseurl = 'http://%s:20080/' % (self.ip)

        try:
            br = mechanize.Browser()  #Create mechanize browser object
            #Added false headers
            try:
                cookies = mechanize.CookieJar()
                opener = mechanize.build_opener(
                    mechanize.HTTPCookieProcessor(cookies))
                opener.addheaders = [(
                    "User-agent",
                    "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
                )]
                mechanize.install_opener(opener)
            except Exception as e:
                print(str(e))

            try:
                if 'TJ1400' in br.open(self.baseurl, timeout=5.0).read():
                    self.new_adm = True
                br.form = list(br.forms())[0]
                controls = list(br.form.controls)
                controls[0].value = 'tejas'
                controls[1].value = 'j72e#05t'
                page = br.submit()
                self.new_adm = True
                time.sleep(5)
                page = br.open(self.baseurl, timeout=5.0).read()
            except Exception as e:
                #print("{}-{}".format(str(e), self.ip))
                br = mechanize.Browser()
                br.add_password(
                    self.baseurl, username, passw
                )  #Get user id and password from command line arguements
                page = br.open(self.baseurl,
                               timeout=5.0).read()  #Check if NE is accessible
                self.new_adm = False
            if 'alarmBanner' in page:
                print "Logged in to %s" % (self.baseurl)

            loggedIn = self.get_laser_data(br)  #Read laser data of STM ports
            failTime = threading.Thread(target=self.get_fail_time, args=(br, ))
            failTime.start()
            #self.get_fail_time(br)                                                 #Read alarams (MS DCC Fail only)

            addNeighbours = threading.Thread(target=self.add_neighbours,
                                             args=(br, ))
            addNeighbours.start()
            #self.add_neighbours(br)                                                #Add neighbours

            if loggedIn:
                self.backup(br)  #Backup cross-connect info
            failTime.join()
            addNeighbours.join()
            #print(self.neighbours)
            if self.alarams_dict:
                for stm in self.alarams_dict.keys():
                    if stm in self.neighbours.keys():
                        fail_node_times = [[
                            self.ip, self.neighbours[stm][0],
                            self.alarams_dict[stm]
                        ]]
                        fail_times.extend(fail_node_times)

        except Exception as e:
            print("\nError reading {} \n-+--+- {} -+--+-".format(ip, str(e)))
        br.close()
        return (None)
            except urllib2.URLError, e:
                #print e.read()
                raise
            data = r.read()
            #print data
            self.assert_(data.find("Your browser supports cookies!") >= 0)
            self.assert_(len(cj) == 1)

            # test response.seek() (added by HTTPEquivProcessor)
            r.seek(0)
            samedata = r.read()
            r.close()
            self.assert_(samedata == data)
        finally:
            o.close()
            install_opener(None)

    def test_robots(self):
        plain_opener = mechanize.build_opener(
            mechanize.HTTPRobotRulesProcessor)
        browser = mechanize.Browser()
        for opener in plain_opener, browser:
            r = opener.open(urljoin(self.uri, "robots"))
            self.assertEqual(r.code, 200)
            self.assertRaises(mechanize.RobotExclusionError, opener.open,
                              urljoin(self.uri, "norobots"))

    def test_urlretrieve(self):
        url = urljoin(self.uri, "/mechanize/")
        test_filename = "python.html"
import mechanize

cookies = mechanize.CookieJar()

cookie_opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))

mechanize.install_opener(cookie_opener) 

url = "http://www.webscantest.com/crosstraining/aboutyou.php"


res = mechanize.urlopen(url)
content = res.read()
	
  
	
Ejemplo n.º 47
0
    def login(self,className):
        """
        Automatically generate a cookie file for the coursera site.
        """
        #TODO: use proxy here
        hn,fn = tempfile.mkstemp()
        cookies = cookielib.LWPCookieJar()
        handlers = [
            urllib2.HTTPHandler(),
            urllib2.HTTPSHandler(),
            urllib2.HTTPCookieProcessor(cookies)
        ]
        opener = urllib2.build_opener(*handlers)

        url = self.lecture_url_from_name(className)
        req = urllib2.Request(url)

        try:
            res = opener.open(req)
        except urllib2.HTTPError as e:
            if e.code == 404:
                raise Exception("Unknown class %s" % className)

        # get the csrf token
        csrfcookie = [c for c in cookies if c.name == "csrf_token"]
        if not csrfcookie: raise Exception("Failed to find csrf cookie")
        csrftoken = csrfcookie[0].value

        opener.close()

        # call the authenticator url:
        cj = cookielib.MozillaCookieJar(fn)
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj),
                                    urllib2.HTTPHandler(),
                                    urllib2.HTTPSHandler())

        opener.addheaders.append(('Cookie', 'csrftoken=%s' % csrftoken))
        opener.addheaders.append(('Referer', 'https://www.coursera.org'))
        opener.addheaders.append(('X-CSRFToken', csrftoken))
        req = urllib2.Request(self.LOGIN_URL)

        data = urllib.urlencode({'email_address': self.username,'password': self.password})
        req.add_data(data)

        try:
            opener.open(req)
        except urllib2.HTTPError as e:
            if e.code == 401:
                raise Exception("Invalid username or password")

        # check if we managed to login
        sessionid = [c.name for c in cj if c.name == "sessionid"]
        if not sessionid:
            raise Exception("Failed to authenticate as %s" % self.username)

        # all should be ok now, mechanize can handle the rest if we give it the
        # cookies
        br = mechanize.Browser()
        #br.set_debug_http(True)
        #br.set_debug_responses(False)
        #br.set_debug_redirects(True)
        br.set_handle_robots(False)
        br.set_cookiejar(cj)

        if self.proxy:
            br.set_proxies({"http":self.proxy})

        self.browser = br

        # also use this cookiejar for other mechanize operations (e.g., urlopen)
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
        mechanize.install_opener(opener)
Ejemplo n.º 48
0
 def getNewToken(self):
     import mechanize #@UnresolvedImport
     br = mechanize.Browser()
     __addon__ = xbmcaddon.Addon(id='script.facebook.media')
     cookiesPath = os.path.join(xbmc.translatePath(__addon__.getAddonInfo('profile')),'cache','cook​ies')
     LOG('Cookies will be saved to: ' + cookiesPath)
     cookies = mechanize.LWPCookieJar(cookiesPath)
     if os.path.exists(cookiesPath): cookies.load()
     self.cookieJar = cookies
     opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
     mechanize.install_opener(opener)
     br.set_cookiejar(self.cookieJar)
     br._ua_handlers["_cookies"].cookiejar.clear()
     br.set_handle_robots(False)
     agent = 'XBMC/{0} Facebook-Media/{1}'.format(xbmc.getInfoLabel('System.BuildVersion'),self.version)
     LOG('Setting User Agent: {0}'.format(agent))
     br.addheaders = [('User-agent',agent)]
     scope = ''
     if self.scope: scope = '&scope=' + self.scope
     url =     'https://www.facebook.com/dialog/oauth?client_id='+self.client_id+\
             '&redirect_uri='+self.redirect+\
             '&type=user_agent&display=popup'+scope
     LOG(url)
     try:
         res = br.open(url)
         html = res.read()
     except:
         LOG("ERROR: TOKEN PAGE INITIAL READ")
         raise
     
     script = False
     try:
         #check for login form
         br.select_form(nr=0)
         LOG("HTML")
     except:
         self.genericError()
         script = True
         LOG("SCRIPT")
         
     if script:
         #no form, maybe we're logged in and the token is in javascript on the page
         url = res.geturl()
         token = self.extractTokenFromURL(url)
         if not token: token = self.parseTokenFromScript(html)
     else:
         try:
             #fill out the form and submit
             br['email'] = self.login_email
             br['pass'] = self.login_pass
             res = br.submit()
             url = res.geturl()
             LOG("FORM")
         except:
             LOG("FORM ERROR")
             raise
             
         script = False
         token = self.extractTokenFromURL(url)
         html = self.browserRead(res,'-noscript')
         if not token:
             #if 'class="checkpoint"' in html:
             token = self.handleLoginNotificationCrap(br)
             
         if not token: script = True
         
         if script:
             LOG("SCRIPT TOKEN")
             #no token in the url, let's try to parse it from javascript on the page
             try:
                 __addon__ = xbmcaddon.Addon(id='script.facebook.media')
                 htmlFile = os.path.join(xbmc.translatePath(__addon__.getAddonInfo('profile')),'cache','DEBU​G_HTML.html')
                 open(htmlFile,'w').write(html)
                 LOG('html output written to: ' + htmlFile)
             except:
                 pass
             token = self.parseTokenFromScript(html)
             token = urllib.unquote(token.decode('unicode-escape'))
     
     if not self.tokenIsValid(token):
         #if script: LOG("HTML:" + html)
         return False
     LOG("\n|--------------------\n|TOKEN: %s\n|--------------------"  % token)
     self.saveToken(token)
     if self.cookieJar is not None:
         self.cookieJar.save()
     return token
Ejemplo n.º 49
0
    def _product_urls_and_types(cls, product_types):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
                             ('From', '*****@*****.**')]
        mechanize.install_opener(opener)

        url_buscar_productos = '/cl/'
        product_links = []
        url_base = 'http://www.dell.com'

        # Start home
        if 'Notebook' in product_types:
            url_extensions = [
                'p/laptops?cat=laptops',
            ]

            for url_extension in url_extensions:
                url_webpage = url_base + url_buscar_productos + url_extension

                r = mechanize.urlopen(url_webpage)
                soup = BeautifulSoup(r.read())

                notebook_lines_container = soup.find('div',
                                                     'tabschegoryGroups')
                notebook_lines = \
                    notebook_lines_container.findAll('div', recursive=False)

                notebook_urls = []
                for line in notebook_lines:
                    for container in line.findAll('div', 'prodImg'):
                        link = container.find('a')['href'].replace('pd', 'fs')
                        notebook_urls.append(url_base + link)

                for url in notebook_urls:
                    for url in cls.retrieve_line_links(url):
                        product_links.append([url, 'Notebook'])

            # Start business

            url_extensions = [
                'empresas/p/laptops',
            ]

            for url_extension in url_extensions:
                url_webpage = url_base + url_buscar_productos + url_extension
                r = mechanize.urlopen(url_webpage)
                soup = BeautifulSoup(r.read())

                line_links = soup.find('div', 'content').findAll('a')
                for link in line_links:
                    url = url_base + link['href']
                    for url in cls.retrieve_enteprise_links(url):
                        product_links.append([url, 'Notebook'])

        # Start Monitor

        if 'Monitor' in product_types:
            url_extensions = [
                '/content/products/compare.aspx/19_22widescreen'
                '?c=cl&cs=cldhs1&l=es&s=dhs',
                '/content/products/compare.aspx/23_30widescreen'
                '?c=cl&cs=cldhs1&l=es&s=dhs',
                '/cl/es/empresas/Monitores/19_15widescreen/cp.aspx'
                '?refid=19_15widescreen&s=bsd&cs=clbsdt1',
                '/cl/es/empresas/Monitores/22_20widescreen/cp.aspx'
                '?refid=22_20widescreen&s=bsd&cs=clbsdt1',
                '/cl/es/empresas/Monitores/30_24widescreen/cp.aspx'
                '?refid=30_24widescreen&s=bsd&cs=clbsdt1',
                '/cl/es/empresas/Monitores/20_19flatpanel/cp.aspx'
                '?refid=20_19flatpanel&s=bsd&cs=clbsdt1',
            ]

            for url_extension in url_extensions:
                url_webpage = url_base + url_extension

                r = mechanize.urlopen(url_webpage)
                soup = BeautifulSoup(r.read())

                links = soup.findAll('a', 'lnk')
                for link in links:
                    if 'configure' in link['href']:
                        product_links.append([link['href'], 'Monitor'])

        return product_links
Ejemplo n.º 50
0
 def revert_install():
     mechanize.install_opener(old_opener_m)
     urllib2.install_opener(old_opener_u)
Ejemplo n.º 51
0
 def getformpage(self, user, password, choice):
     projectgroupurl = "http://leo.rp.edu.sg//workspace/studentModule.asp?site="
     qnnurl = "http://leo3.rp.edu.sg//projectweb/group_evaluation.asp?"
     quizurl = "http://leo3.rp.edu.sg//projectweb/qnn_take.asp?"
     sessionurl = "http://leo3.rp.edu.sg//projectweb/qnn_preset.asp?"
     charturl = "http://leo3.rp.edu.sg//projectweb/response_chart.asp?"
     saveurl = "http://leo3.rp.edu.sg//projectweb/qnn_save_responses.asp"
     urllist = []
     for i in range (1, 4):
         urllist.append(projectgroupurl + str(i))
     
     # retrieve the result
     currentModule = "projectid"
     currentProblem = "groupid"
     try:    
         for url in urllist:
             opener = self.addAuthentication(url, user, password)
             mechanize.install_opener(opener)
             response = mechanize.Request(url)
             page = urlopen(response).read()
             if ("Wrong Password" in page or "Wrong ID" in page):
                 self.vNP.set("Sorry, USERNAME or PASSWORD wrong!")
             elif ('''ToggleDisplay''' in page):
                 currentModule = self.getidlist("projectid", page)[-1]
                 currentProblem = self.getidlist("groupid", page)[-1]
         if (currentModule != "projectid" and currentProblem != "groupid"):
             getqnnurl = qnnurl + currentModule + "&" + currentProblem + "&lang=ISO-8859-1"
             opener = self.addAuthentication(getqnnurl, user, password)
             mechanize.install_opener(opener)
             response = mechanize.Request(getqnnurl)
             getqnnpage = urlopen(response)
             forms = ParseResponse(getqnnpage, backwards_compat=False)
             form = forms[0]
             qnnid = form["qnnid"]
             evalid = form["evalid"]
             opener = self.addAuthentication(getqnnurl, user, password)
             mechanize.install_opener(opener)
             response = mechanize.Request(getqnnurl)
             getqnnpageread = urlopen(response).read()
             author_evaluatorlist = re.findall(r"'\d{5}', '.{38}'", getqnnpageread)
             #for i in range(len(author_evaluatorlist)):
             authorid = author_evaluatorlist[0][1:6]
             evaluatorid = author_evaluatorlist[0][10:-1]
             getsessionurl = sessionurl + "&qnnid=" + qnnid + "&" + currentModule + "&" + currentProblem + "&evalid=" + evalid + "&evaltype=P" + "&authorid=" + authorid + "&evaluatorid=" + evaluatorid + "&lang=ISO-8859-1"
             opener = self.addAuthentication(getsessionurl, user, password)
             mechanize.install_opener(opener)
             response = mechanize.Request(getsessionurl)
             getqnnpage = urlopen(response)
             forms = ParseResponse(getqnnpage, backwards_compat=False)
             form = forms[0]
             form.set_all_readonly(False)
             form["qnnid"] = qnnid            
             form["authorid"] = authorid
             form["evaluatorid"] = evaluatorid
             form["evaltype"] = "P"
             form["lang"] = "ISO-8859-1"
             form["newflag"] = "0"
             form["evalid"] = evalid
             form["groupid"] = currentProblem[8:]
             form["projectid"] = currentModule[10:]
             submit = form.click()
             data = submit.get_data()
             opener = self.addAuthentication(quizurl, user, password)
             mechanize.install_opener(opener)
             response = mechanize.Request(quizurl, data)
             sessionid = self.getidlist("sessionid", urlopen(response).read())[0]
             answerurl = re.search("(\<FRAME NAME=\"main\" SRC=\")(.+)(\"\>)", urlopen(response).read()).group(2)
             answerurl = "http://leo3.rp.edu.sg//projectweb/" + answerurl
             opener = self.addAuthentication(answerurl, user, password)
             mechanize.install_opener(opener)
             rs = mechanize.Request(answerurl, data)
             quiz = urlopen(rs)
             quizpage = urlopen(rs).read()
             questionid = re.search(r"\{.+\}num", quizpage).group()[0:-3]
             forms = ParseResponse(quiz, backwards_compat=False)
             form = forms[0]
             self.fillform(form, choice,questionid,sessionid,charturl,user,password)
             form.set_all_readonly(False)
             form["finish"] = "MANUAL"
             print form
             '''
             data = form.click().get_data()
             opener = self.addAuthentication(saveurl, user, password)
             mechanize.install_opener(opener)
             req = mechanize.Request(saveurl, data)
             req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
             req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
             req.add_header('Accept-Encoding', 'gzip,deflate')
             print urlopen(req).read()
             '''
         else:
             self.vNP.set("Sorry, TODAY NO MODULE!")
     except mechanize.HTTPError, e:
             self.vNP.set("Error:",BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code])
Ejemplo n.º 52
0
    def login(self,className):
        """
        Login into coursera and obtain the necessary session cookies.
        """
        hn,fn = tempfile.mkstemp()
        cookies = cookielib.LWPCookieJar()
        handlers = [
            urllib2.HTTPHandler(),
            urllib2.HTTPSHandler(),
            urllib2.HTTPCookieProcessor(cookies)
        ]

        # prepend a proxy handler if defined
        if(self.proxy):
            proxy = urllib2.ProxyHandler({'http': self.proxy})
            handlers = [proxy] + handlers

        opener = urllib2.build_opener(*handlers)

        url = self.lecture_url_from_name(className)
        req = urllib2.Request(url)

        try:
            res = opener.open(req)
        except urllib2.HTTPError as e:
            if e.code == 404:
                raise Exception("Unknown class %s" % className)

        # get the csrf token
        csrfcookie = [c for c in cookies if c.name == "csrf_token"]
        if not csrfcookie: raise Exception("Failed to find csrf cookie")
        csrftoken = csrfcookie[0].value
        opener.close()

        # call the authenticator url:
        cj = cookielib.MozillaCookieJar(fn)
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj),
                                    urllib2.HTTPHandler(),
                                    urllib2.HTTPSHandler())

        opener.addheaders.append(('Cookie', 'csrftoken=%s' % csrftoken))
        opener.addheaders.append(('Referer', 'https://accounts.coursera.org/signin'))
        opener.addheaders.append(('X-CSRFToken', csrftoken))
        req = urllib2.Request(self.LOGIN_URL)

        data = urllib.urlencode({'email': self.username,'password': self.password})
        req.add_data(data)

        try:
            opener.open(req)
        except urllib2.HTTPError as e:
            if e.code == 401:
                raise Exception("Invalid username or password")

        # check if we managed to login
        sessionid = [c.name for c in cj if c.name == "CAUTH"]
        if not sessionid:
            raise Exception("Failed to authenticate as %s" % self.username)

        # all should be ok now, mechanize can handle the rest if we give it the
        # cookies
        br = mechanize.Browser()
        #br.set_debug_http(True)
        #br.set_debug_responses(False)
        #br.set_debug_redirects(True)
        br.set_handle_robots(False)
        br.set_cookiejar(cj)

        if self.proxy:
            br.set_proxies({"http":self.proxy})

        self.browser = br

        # also use this cookiejar for other mechanize operations (e.g., urlopen)
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
        mechanize.install_opener(opener)
Ejemplo n.º 53
0
    def retrieve_product_links(self):
        cookies = mechanize.CookieJar()
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
        opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'),
            ('From', '*****@*****.**')]
        mechanize.install_opener(opener)

        url_buscar_productos = '/cl/'
        product_links = []
        url_base = 'http://www.dell.com'

        # Start home
        url_extensions = [
            'p/laptops?cat=laptops',
            ]

        for url_extension in url_extensions:
            url_webpage = url_base + url_buscar_productos + url_extension

            r = mechanize.urlopen(url_webpage)
            soup = BeautifulSoup(r.read())

            notebook_lines_container = soup.find('div',
                'tabschegoryGroups')
            notebook_lines = notebook_lines_container.findAll('div',
                recursive=False)

            notebook_urls = []
            for line in notebook_lines:
                for container in line.findAll('div', 'prodImg'):
                    link = container.find('a')['href'].replace('pd', 'fs')
                    notebook_urls.append(url_base + link)

            for url in notebook_urls:
                for url in self.retrieve_line_links(url):
                    product_links.append([url, 'Notebook'])

        # Start business

        url_extensions = [
            'empresas/p/laptops',
            ]

        for url_extension in url_extensions:
            url_webpage = url_base + url_buscar_productos + url_extension
            r = mechanize.urlopen(url_webpage)
            soup = BeautifulSoup(r.read())

            product_containers = soup.findAll('div', 'carouselProduct')
            for container in product_containers:
                url = url_base + container.find('a')['href']
                for url in self.retrieve_enteprise_links(url):
                    product_links.append([url, 'Notebook'])

        # Start Monitor
        url_extensions = [
            '/content/products/compare.aspx/19_22widescreen'
            '?c=cl&cs=cldhs1&l=es&s=dhs',
            '/content/products/compare.aspx/23_30widescreen'
            '?c=cl&cs=cldhs1&l=es&s=dhs',
            '/cl/es/empresas/Monitores/19_15widescreen/cp.aspx'
            '?refid=19_15widescreen&s=bsd&cs=clbsdt1',
            '/cl/es/empresas/Monitores/22_20widescreen/cp.aspx'
            '?refid=22_20widescreen&s=bsd&cs=clbsdt1',
            '/cl/es/empresas/Monitores/30_24widescreen/cp.aspx'
            '?refid=30_24widescreen&s=bsd&cs=clbsdt1',
            '/cl/es/empresas/Monitores/20_19flatpanel/cp.aspx'
            '?refid=20_19flatpanel&s=bsd&cs=clbsdt1',
            ]

        for url_extension in url_extensions:
            url_webpage = url_base + url_extension

            r = mechanize.urlopen(url_webpage)
            soup = BeautifulSoup(r.read())

            links = soup.findAll('a', {'class': 'lnk'})
            for link in links:
                if 'configure' in link['href']:
                    product_links.append([link['href'], 'Screen'])

        return product_links
Ejemplo n.º 54
0
 def revert_install():
     mechanize.install_opener(old_opener_m)
     urllib2.install_opener(old_opener_u)