Esempio n. 1
0
File: slurping.py Progetto: yz-/ut
def slurp_with_login_and_pwd():
    import sys
    import mechanize
    # sys.path.append('ClientCookie-1.0.3')
    # from mechanize import ClientCookie
    # sys.path.append('ClientForm-0.1.17')
    # import ClientForm

    # Create special URL opener (for User-Agent) and cookieJar
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent","Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    fp = mechanize.urlopen("http://login.yahoo.com")
    forms = mechanize.ParseResponse(fp)
    fp.close()

    # print forms on this page
    for form in forms:
        print "***************************"
        print form

    form = forms[0]
    form["login"]  = "******" # use your userid
    form["passwd"] = "password"      # use your password
    fp = mechanize.urlopen(form.click())
    fp.close()
    fp = mechanize.urlopen("https://class.coursera.org/ml-003/lecture/download.mp4?lecture_id=1") # use your group
    fp.readlines()
    fp.close()
Esempio n. 2
0
    def main(self):
        keyword = remove_polish(word)
        openable = 1
        response = urlopen(uri)
        forms = ParseResponse(response, backwards_compat=False)
        if len(forms)==0:
            os.system("python PyCrawler.py"+" baza.db '"+ uri+"' 1 "+ keyword)
            return
        form = forms[0]

        # search for text input in form and put keyword there
        control = form.find_control(type="text")

        control.value = keyword

        # form.click() returns a mechanize.Request object
        # (see HTMLForm.click.__doc__ if you want to use only the forms support, and
        # not the rest of mechanize)
        request2 = form.click()  # mechanize.Request object
        try:
            response2 = urlopen(request2)
        except:
            print "Nie mozna otworzyc formularza"
            openable = 0
            pass

        #get the url of page
        if not openable:
            search_url=uri
        else:
            search_url = response2.geturl()

        #start crawler on it
        os.system("python PyCrawler.py"+" baza.db '"+ search_url+"' 1 "+ keyword)
def get_vorlage(session_id, url):
    try:
        response = mechanize.urlopen(mechanize.Request(url))
        pprint.pprint(response)
    except URLError:
        return
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    for form in forms:
        # All forms are iterated. Might not all be attachment-related.
        for control in form.controls:
            if control.name == 'DT':
                print control.name, control.value
                request2 = form.click()
                try:
                    response2 = mechanize.urlopen(request2)
                    form_url = response2.geturl()
                    if "getfile.asp" in form_url:
                        #print "ERFOLG:", response2.info()
                        pdf = response2.read()
                        md5 = hashlib.md5(pdf).hexdigest()
                        scraperwiki.sqlite.save(
                            unique_keys=['session_id', 'dt', 'md5', 'size'],
                            data={
                                'session_id': session_id,
                                'dt': control.value,
                                'md5': md5,
                                'size': len(pdf)
                            })
                        continue
                except mechanize.HTTPError, response2:
                    print "HTTP-FEHLER :("
                except URLError:
                    pass
Esempio n. 4
0
    def getDLurl(self, url):
        try:
            content = self.getUrl(url)
            match = re.findall('flashvars.playlist = \'(.*?)\';', content)
            if match:
                for url in match:
                    url = 'http://ua.canna.to/canna/' + url
                    content = self.getUrl(url)
                    match = re.findall('<location>(.*?)</location>', content)
                    if match:
                        for url in match:
                            url = 'http://ua.canna.to/canna/' + url
                            req = mechanize.Request(
                                'http://ua.canna.to/canna/single.php')
                            response = mechanize.urlopen(req)
                            req = mechanize.Request(url)
                            req.add_header(
                                'User-Agent',
                                ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'
                            )
                            response = mechanize.urlopen(req)
                            response.close()
                            code = response.info().getheader(
                                'Content-Location')
                            url = 'http://ua.canna.to/canna/avzt/' + code
                            print url
                            return url

        except urllib2.HTTPError, error:
            printl(error, self, "E")
            message = self.session.open(MessageBox, ("Fehler: %s" % error),
                                        MessageBox.TYPE_INFO,
                                        timeout=3)
            return False
def rtnHTMLformat(tmpddGenrcgenPresent, sppPrefx, pthwcod, ouPthwpng):
    inpx = '\n'.join(tmpddGenrcgenPresent)  # inpx="ALDH2 color \nALDH3A1	color"
    request = mechanize.Request(
        "http://www.genome.jp/kegg/tool/map_pathway2.html")
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[0]
    form["unclassified"] = inpx
    form["org"] = sppPrefx
    request2 = form.click()
    response2 = mechanize.urlopen(request2)
    a = str(response2.read()).split('href="/kegg-bin/show_pathway?')[1]
    code = a.split('/')[0]  # response2.read()
    request = mechanize.Request(
        "http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args" % (code, pthwcod))  # request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410'))
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[1]
    status = ' NOT '
    try:
        imgf = str(forms[1]).split('/mark_pathway')[1].split('/')[0]
        os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s" % (imgf, pthwcod, ouPthwpng))
        status = ' '
    except:
        pass
    return 'A pathway image was%ssuccefully produced...' % status
	def getDLurl(self, url):
		try:
			content = self.getUrl(url)
			match = re.findall('flashvars.playlist = \'(.*?)\';', content)
			if match:
				for url in match:
					url = 'http://ua.canna.to/canna/'+url
					content = self.getUrl(url)
					match = re.findall('<location>(.*?)</location>', content)
					if match:
						for url in match:
							req = mechanize.Request('http://ua.canna.to/canna/single.php')
							response = mechanize.urlopen(req)
							url = 'http://ua.canna.to/canna/'+url
							req = mechanize.Request(url)
							req.add_header('User-Agent', canna_agent)
							response = mechanize.urlopen(req)
							response.close()
							code=response.info().getheader('Content-Location')
							url='http://ua.canna.to/canna/avzt/'+code
							return url

		except urllib2.HTTPError, error:
			printl(error,self,"E")
			message = self.session.open(MessageBoxExt, (_("Error: %s") % error), MessageBoxExt.TYPE_INFO, timeout=3)
			return False
Esempio n. 7
0
 def reply_to(self, postid, message):
     params = urllib.urlencode({
         'post_parent_id': postid,
         'snId': 0,
         'post_text': message
     })
     mechanize.urlopen(self.base_url + "/actions/post.php", params)
Esempio n. 8
0
def connect(url, username, password):
    try:
        if connected(url):
            raise LinkException('You are already connected')
        try:
            response = urlopen(url)
        except URLError:
            raise SSIDException('You are not connected on a FON box')

        forms = ParseResponse(response, backwards_compat=False)

        try:
            form = forms[0]
            form["login[user]"] = username
            form["login[pass]"] = password
        except IndexError:
            raise SSIDException('You are not connected on a FON box')

        try:
            response_page = urlopen(form.click()).read()
        except NameError:
            raise SSIDException('You are not connected on a FON box')

        return not 'class="form_error"' in response_page
    except PlainURLError:
        if connected(url):
            return True
        else:
            raise RuntimeError("Connection failed.")
Esempio n. 9
0
def _get_results(form, dbg = False):
    # click the form
    clicked_form = form.click()
    # then get the results page
    result = mechanize.urlopen(clicked_form)

    #### EXPORTING RESULTS FILE
    # so what I do is that I fetch the first results page,
    # click the form/link to get all hits as a colon separated
    # ascii table file
    
    # get the form
    resultform = mechanize.ParseResponse(result, backwards_compat=False)
    result.close()
    resultform = resultform[0]
    # set colon as dilimeter of the table (could use anything I guess)
    #~ resultform.find_control('export_delimiter').items[1].selected =  True
    resultform.find_control('export_delimiter').toggle('colon')
    resultform_clicked = resultform.click()
    result_table = mechanize.urlopen(resultform_clicked)
    data = result_table.read()
    result_table.close()
    if dbg:
        return resultform, result_table, data
    else:
        return data
Esempio n. 10
0
 def login(self, username, password):
     response = urlopen(urljoin(self.uri, "/?cs=login"))
     forms = ParseResponse(response, backwards_compat=False)
     form = forms[0]
     form.set_value(username, name='username')
     form.set_value(password, name='password')
     self.page = urlopen(form.click())
def loginRedmine():
	forms = ParseResponse(urlopen(URL_LOGIN))
	form = forms[0]
	form['username'] = USER_ID
	form['password'] = USER_PW
	request = form.click()
	mechanize.urlopen(request)
Esempio n. 12
0
def connect(url, username, password):
    try:
        if connected(url):
            raise LinkException('You are already connected')
        try:
            response = urlopen(url)
        except URLError:
            raise SSIDException('You are not connected on a FON box')

        forms = ParseResponse(response, backwards_compat=False)

        try:
            form = forms[0]
            form["login[USERNAME]"] = username
            form["login[PASSWORD]"] = password
        except IndexError:
            raise SSIDException('You are not connected on a FON box')

        try:
            response_page = urlopen(form.click()).read()
        except NameError:
            raise SSIDException('You are not connected on a FON box')

        return not 'class="form_error"' in response_page
    except PlainURLError:
        if connected(url):
            return True
        else:
            raise RuntimeError("Connection failed.")
Esempio n. 13
0
 def reply_to(self, postid, message):
     params = urllib.urlencode({
         'post_parent_id': postid,
         'snId': 0,
         'post_text': message
     })
     mechanize.urlopen(self.base_url + "/actions/post.php", params)
Esempio n. 14
0
def _get_results(form, dbg=False):
    # click the form
    clicked_form = form.click()
    # then get the results page
    result = mechanize.urlopen(clicked_form)

    #### EXPORTING RESULTS FILE
    # so what I do is that I fetch the first results page,
    # click the form/link to get all hits as a colon separated
    # ascii table file

    # get the form
    resultform = mechanize.ParseResponse(result, backwards_compat=False)
    result.close()
    resultform = resultform[0]
    # set colon as dilimeter of the table (could use anything I guess)
    #~ resultform.find_control('export_delimiter').items[1].selected =  True
    resultform.find_control('export_delimiter').toggle('colon')
    resultform_clicked = resultform.click()
    result_table = mechanize.urlopen(resultform_clicked)
    data = result_table.read()
    result_table.close()
    if dbg:
        return resultform, result_table, data
    else:
        return data
Esempio n. 15
0
    def test_sending_headers(self):
        handler = self._make_request_handler([(200, [], "we don't care")])

        req = mechanize.Request("http://localhost:%s/" % handler.port,
                                headers={'Range': 'bytes=20-39'})
        mechanize.urlopen(req)
        self.assertEqual(handler.received_headers['Range'], 'bytes=20-39')
def get_vorlage(session_id, url):
    try:
        response = mechanize.urlopen(mechanize.Request(url))
        pprint.pprint(response)
    except URLError:
        return
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    for form in forms:
        # All forms are iterated. Might not all be attachment-related.
        for control in form.controls:
            if control.name == 'DT':
                print control.name, control.value
                request2 = form.click()
                try:
                    response2 = mechanize.urlopen(request2)
                    form_url = response2.geturl()
                    if "getfile.asp" in form_url:
                        #print "ERFOLG:", response2.info()
                        pdf = response2.read()
                        md5 = hashlib.md5(pdf).hexdigest()
                        scraperwiki.sqlite.save(
                            unique_keys=['session_id', 'dt', 'md5', 'size'],
                            data={'session_id': session_id, 'dt': control.value, 'md5': md5, 'size': len(pdf)})
                        continue
                except mechanize.HTTPError, response2:
                    print "HTTP-FEHLER :("
                except URLError:
                    pass
Esempio n. 17
0
File: ivw.py Progetto: g-div/ivw-viz
    def __get_csv(self, letter='a', now=False):

        #open the url
        current_url = self.overview_url + '1111&b=' + letter
        overview_req = mechanize.Request(current_url)
        overview_res = mechanize.urlopen(overview_req)

        #find the list of entries to post
        py_query = PyQuery(overview_res.read())
        titlelist = py_query("input[name='titelnrliste']").val()

        #create the post request
        post_data = {
            'url': current_url,
            'download': '[Download]',
            'titelnrliste': titlelist
        }

        if (now):
            #find the checked box (the current quartal)
            default_quartal = py_query(".quartal input:checked").attr('name')
            post_data[str(default_quartal)] = 'ON'
        else:
            #enable all quartal's checkbox
            quartals = [1, 2, 3, 4]
            for i in quartals:
                if i in range(1, 5):
                    post_data[str(self.year) + str(i)] = 'ON'

        #send the post request
        csv_req = mechanize.Request(current_url, urllib.urlencode(post_data))
        csv_res = mechanize.urlopen(csv_req)
        self.csv_parser.process_result(response=csv_res)
    def test_sending_headers(self):
        handler = self._make_request_handler([(200, [], "we don't care")])

        req = mechanize.Request("http://localhost:%s/" % handler.port,
                              headers={'Range': 'bytes=20-39'})
        mechanize.urlopen(req)
        self.assertEqual(handler.received_headers['Range'], 'bytes=20-39')
Esempio n. 19
0
    def __get_csv(self, letter='a', now=False):

        #open the url
        current_url = self.overview_url + '1111&b=' + letter
        overview_req = mechanize.Request(current_url)
        overview_res = mechanize.urlopen(overview_req)

        #find the list of entries to post
        py_query = PyQuery(overview_res.read())
        titlelist = py_query("input[name='titelnrliste']").val()

        #create the post request
        post_data = {
            'url': current_url,
            'download': '[Download]',
            'titelnrliste': titlelist
        }

        if (now):
            #find the checked box (the current quartal)
            default_quartal = py_query(".quartal input:checked").attr('name')
            post_data[str(default_quartal)] = 'ON'
        else:
            #enable all quartal's checkbox
            quartals = [1, 2, 3, 4]
            for i in quartals:
                if i in range(1, 5):
                    post_data[str(self.year) + str(i)] = 'ON'

        #send the post request
        csv_req = mechanize.Request(current_url, urllib.urlencode(post_data))
        csv_res = mechanize.urlopen(csv_req)
        self.csv_parser.process_result(response=csv_res)
Esempio n. 20
0
 def login1(self):
     self.brow = mechanize.Browser()
     
     httpHandler = mechanize.HTTPHandler()
     httpsHandler = mechanize.HTTPSHandler()
     
     httpHandler.set_http_debuglevel(DEBUG)
     self.cookiejar = mechanize.LWPCookieJar()
     #self.cookiejar = "Cookie    lzstat_uv=34741959842666604402|1786789; Hm_lvt_976797cb85805d626fc5642aa5244ba0=1304534271541; ASPSESSIONIDQCDRAQBB=JHCHINLAHGMAIGBIFMNANLGF; lzstat_ss=2189193215_2_1304564199_1786789; Hm_lpvt_976797cb85805d626fc5642aa5244ba0=1304535401191"
     self.opener = mechanize.OpenerFactory(mechanize.SeekableResponseOpener).build_opener(
                                     httpHandler,httpsHandler,
                                     mechanize.HTTPCookieProcessor(self.cookiejar),
                                     mechanize.HTTPRefererProcessor,
                                     mechanize.HTTPEquivProcessor,
                                     mechanize.HTTPRefreshProcessor,
                                     )
     self.opener.addheaders = [("User-Agent","Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13"),
                          ("From", "")]
     #self.opener.addheaders = [(
      #                     "Referer", self.data['postUrl']
     #                      )]
     login={}
     login['method'] = self.data['method']
     login['name'] = self.data['name']
     login['pwd'] = self.data['pwd']
     loginUrl = self.data['loginUrl']+'?'+urllib.urlencode(login)
     print loginUrl
     response = mechanize.urlopen("http://esf.soufun.com/")
     response = mechanize.urlopen(loginUrl)
     print response.read().decode('gb2312')
Esempio n. 21
0
def slurp_with_login_and_pwd():
    import sys
    import mechanize
    # sys.path.append('ClientCookie-1.0.3')
    # from mechanize import ClientCookie
    # sys.path.append('ClientForm-0.1.17')
    # import ClientForm

    # Create special URL opener (for User-Agent) and cookieJar
    cookieJar = mechanize.CookieJar()

    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookieJar))
    opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible)")]
    mechanize.install_opener(opener)
    fp = mechanize.urlopen("http://login.yahoo.com")
    forms = mechanize.ParseResponse(fp)
    fp.close()

    # print forms on this page
    for form in forms:
        print "***************************"
        print form

    form = forms[0]
    form["login"] = "******"  # use your userid
    form["passwd"] = "password"  # use your password
    fp = mechanize.urlopen(form.click())
    fp.close()
    fp = mechanize.urlopen(
        "https://class.coursera.org/ml-003/lecture/download.mp4?lecture_id=1"
    )  # use your group
    fp.readlines()
    fp.close()
Esempio n. 22
0
	def getDLurl(self, url):
		try:
			content = self.getUrl(url)
			match = re.findall('flashvars.playlist = \'(.*?)\';', content)
			if match:
				for url in match:
					url = 'http://ua.canna.to/canna/'+url
					content = self.getUrl(url)
					match = re.findall('<location>(.*?)</location>', content)
					if match:
						for url in match:
							url = 'http://ua.canna.to/canna/'+url
							req = mechanize.Request('http://ua.canna.to/canna/single.php')
							response = mechanize.urlopen(req)
							req = mechanize.Request(url)
							req.add_header('User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
							response = mechanize.urlopen(req)
							response.close()
							code=response.info().getheader('Content-Location')
							url='http://ua.canna.to/canna/avzt/'+code
							print url
							return url

		except urllib2.HTTPError, error:
			printl(error,self,"E")
			message = self.session.open(MessageBox, ("Fehler: %s" % error), MessageBox.TYPE_INFO, timeout=3)
			return False
Esempio n. 23
0
def login(conf):
    try:
        username = conf.username
        password = conf.password
        request = mechanize.Request('%s/login.php' % conf.website)
        response = mechanize.urlopen(request, timeout=conf.timeout)
        forms = mechanize.ParseResponse(response)
        response.close()

        if len(forms) < 3:
            return Left('Failed to reach the login page.')

        form = forms[2]
        form['username'] = username
        form['password'] = password
        login_request = form.click()

        login_response = mechanize.urlopen(login_request, timeout=conf.timeout)
        logged_in = login_response.geturl() == ('%s/index.php'
                                                % conf.website)

        if not logged_in:
            return Left('Failed to log in with these credentials')

    except mechanize.HTTPError as resp:
        return Left('HTTPError when logging in: %s' % resp)
    except Exception as e:
        return Left('%s' % e)

    if conf.verbose: sys.stdout.write('Logged in as %s\n' % username)
    return Right('Logged in as %s' % username)
def getPropertyPins(streetName):

    url = r'https://taxcommissioner.dekalbcountyga.gov/TaxCommissioner/TCSearch.asp'
    request = mechanize.Request(url)
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    response.close()

    form = forms[0]

    form['StreetName'] = sys.argv[1]
    propertyList = mechanize.urlopen(form.click()).read()

    tree = html.fromstring(propertyList)
    pins = tree.xpath('//tr/td[1]/a/@href')
    addresses = tree.xpath('//tr/td[1]/a/text()')

    pinList = []
    i = 0
    for pin in pins:
        #print pin
        newpin = pin.split('=')
        pinList.append([newpin[3], addresses[i]])
        print newpin[3] + '\t' + addresses[i]
        i = i + 1

    return pinList
Esempio n. 25
0
 def login(self, username, password):
     response = urlopen(urljoin(self.uri, "/?cs=login"))
     forms = ParseResponse(response, backwards_compat=False)
     form = forms[0]
     form.set_value(username, name='username')
     form.set_value(password, name='password')
     self.page = urlopen(form.click())
Esempio n. 26
0
def generate_script_sieve(request, group_id):
    group = get_object_or_404(Group, id=group_id)

    if request.method == 'POST':
        if group.always_data_id:
            # There is always_data mail id
            from mechanize import ParseResponse, urlopen, urljoin
            response = urlopen("https://admin.alwaysdata.com/login/")
            forms = ParseResponse(response, backwards_compat=False)
            login_form = forms[0]

            if settings.DEBUG:
                print login_form

            login_form["email"] = settings.ALWAYS_DATA_ID
            login_form["password"] = settings.ALWAYS_DATA_PASSWORD
            
            response = urlopen(login_form.click())
            url = 'https://admin.alwaysdata.com/email/%d/' % group.always_data_id
            response = urlopen(url)

            forms = ParseResponse(response, backwards_compat=False)

            if settings.DEBUG:
                for form in forms:
                    print form
            try:
                email_form = forms[1]
            except IndexError:
                messages.warning(request, _(u'%(group)s is not bind to alwaysdata yet (wrong password)' % 
                                          {'group': group}))
                
                return HttpResponseRedirect(reverse("group-detail", args=[group.pk]))

            email_form['sieve_filter'] = request.POST['filter_sieve'].encode('utf-8')

            req = email_form.click()
            req.add_header("Referer", url)
            response = urlopen(req)

            messages.success(request, _(u'Alwaysdata has been updated'))

        else:
            messages.warning(request, _(u'%(group)s is not bind to alwaysdata yet' % 
                                        {'group': group}))

        return HttpResponseRedirect(reverse("group-detail", args=[group.pk]))
            
    else:
        filter_sieve = export_sieve_configuration(group.contacts.all())
        
        context = get_global_context_data(Group, Group._meta.app_label)
        context['object_list'] = Group.objects.all()
        context['object'] = group
        context['filter_sieve'] = filter_sieve
        
        return render_to_response('contact/contact-sieve.html',
                                  context,
                                  context_instance=RequestContext(request))
    def test_404(self):
        expected_response = 'Bad bad bad...'
        handler = self._make_request_handler([(404, [], expected_response)])

        try:
            mechanize.urlopen('http://localhost:%s/weeble' % handler.port)
        except mechanize.URLError, f:
            pass
Esempio n. 28
0
    def test_404(self):
        expected_response = 'Bad bad bad...'
        handler = self._make_request_handler([(404, [], expected_response)])

        try:
            mechanize.urlopen('http://localhost:%s/weeble' % handler.port)
        except mechanize.URLError, f:
            pass
Esempio n. 29
0
def get_nyc_legislation():  #search_terms=''
    book = xlwt.Workbook(encoding='utf-8', style_compression = 0)
    sheet = book.add_sheet('Legi', cell_overwrite_ok = True)  
    row=-1

    for items in ['smoking']:
        url = r'http://legistar.council.nyc.gov/Legislation.aspx'
        request = mechanize.Request(url)
        response = mechanize.urlopen(request)
        forms = mechanize.ParseResponse(response, backwards_compat=False)
        form = forms[0]
        response.close()

        form['ctl00$ContentPlaceHolder1$txtSearch'] = items
        submit_page = mechanize.urlopen(form.click())
        soup = BeautifulSoup(submit_page.read())

        for link in soup.find_all("a"):
            legislation = link.get("href")
            try:
                if 'LegislationDetail' in legislation:
                    url_stem = 'http://legistar.council.nyc.gov/'
                    url2 = "%s%s" % (url_stem, legislation)

                    request2 = requests.get(url2)
                    soup2 = BeautifulSoup(request2.content)
                    type = soup2.find_all("span",{"id":"ctl00_ContentPlaceHolder1_lblType2"})
                    status = soup2.find_all("span",{"id":"ctl00_ContentPlaceHolder1_lblStatus2"})
                    print url2

                    if ((type[0].text == "Resolution" or 
                        type[0].text == "Introduction") and 
                        (status[0].text == "Adopted")):

                        legislation_title = soup2.find_all("span",{"id":"ctl00_ContentPlaceHolder1_lblName2"})
                        legislation_date = soup2.find_all("span",{"id":"ctl00_ContentPlaceHolder1_lblOnAgenda2"})
                        legislation_committee = soup2.find_all("a",{"id":"ctl00_ContentPlaceHolder1_hypInControlOf2"})
                        legislation_text = soup2.find_all("span",{"class":"st1"})                                

                        legi_url, title, date, committee, text = ([] for i in range(5))
                        row = row + 1 

                        legi_url = url2                
                        for item in legislation_title:
                            title.append(item.text)
                        for item in legislation_date:
                            date.append(item.text)
                        for item in legislation_committee:
                            committee.append(item.text)
                        for item in legislation_text:
                            text.append(' '+item.text)

                        legi = [legi_url,title,date,committee,text]
                        for column, var_observ in enumerate(legi):
                            sheet.write (row, column, var_observ)
            except:
                pass
    book.save("legislation_data.xls")
Esempio n. 30
0
def install_phpBB():
    print "installing phpBB..."
    #create forum
    url = "http://" + ip + "/phpBB3/"
    install_url = url + "install/index.php?mode=install&sub="
    br = mechanize.Browser()
    post = "dbms=mysqli&dbhost=" + ip  +  "&dbport=" + port +  "&dbname=cryptdb_phpbb&dbuser=root&dbpasswd=letmein&table_prefix=phpbb_&admin_name=admin&admin_pass1=letmein&admin_pass2=letmein&[email protected]&[email protected]"
    config = mechanize.urlopen(install_url+"config_file", data=post);
    br.set_response(config)
    post += "&email_enable=1&smtp_delivery=0&smtp_host=&smtp_auth=PLAIN&smtp_user=&smtp_pass=&cookie_secure=0&force_server_vars=0&server_protocol=http://&server_name=18.26.5.16&server_port=80&script_path=/phpBB"
    advanced = mechanize.urlopen(install_url+"advanced", data=post);
    br.set_response(advanced)
    br.select_form(nr=0)
    br.submit()
    br.select_form(nr=0)
    br.submit()

    os.system("mv $EDBDIR/../apps/phpBB3/install $EDBDIR/../apps/phpBB3/install2")

    print "logging in..."
    #login
    br.open(url+"ucp.php?mode=login")
    br.select_form(nr=1)
    br["username"] = "******"
    br["password"] = "******"
    br.submit()
    print "to ACP..."
    #authenticate to go to ACP
    br.follow_link(text="Administration Control Panel")
    br.select_form(nr=1)
    i = str(br.form).find("password")
    j = str(br.form).find("=)",i)
    br[str(br.form)[i:j]] = "letmein"
    br.submit()
    print "getting permissions page..."
    #navigate to group permissions
    br.follow_link(text="Permissions")
    br.follow_link(text="Groups\xe2\x80\x99 permissions")
    #select Newly Registered Users
    br.select_form(nr=0)
    br["group_id[]"] = ["7"]
    br.submit()
    #set all permissions to yes
    print "setting permissions..."
    br.select_form(nr=1)
    i = 1
    while i > 0:
        start = str(br.form).find("setting[7][0][",i)
        if (start < 0):
            break
        end = str(br.form).find("=[",start)
        if (end < 0):
            break
        br[str(br.form)[start:end]] = ["1"]
        i = end
    br.submit()
def doLogin():
    url = "https://awesome-hd.net/login.php"
    response = mechanize.urlopen(url)

    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[0]
    form["username"] = s.username
    form["password"] = s.password

    mechanize.urlopen(form.click())
Esempio n. 32
0
 def isThereInternet(self):
     try:
         mechanize.urlopen('http://google.com', timeout=1)
     except mechanize.URLError as e:
         print "There is no internet: {}".format(e)
         self.error_msg.setText("You are not connected to the internet")
         self.error_msg.setDetailedText("This feature will not work without an internet connection. ")
         self.error_msg.exec_()
         return False
     else:
         return True
Esempio n. 33
0
    def isThereInternet(self):
        try:
            mechanize.urlopen('http://github.com', timeout=1)
            return True
        except mechanize.URLError as e:
            print "There is no internet {}".format(e)

            self.error_msg.setText("You are not connected to the internet")
            self.error_msg.setDetailedText("This feature will not work without an internet connection. ")
            self.error_msg.exec_()
            return False
Esempio n. 34
0
    def isThereInternet(self):
        try:
            mechanize.urlopen('http://github.com', timeout=1)
            return True
        except mechanize.URLError as e:
            print "There is no internet {}".format(e)

            self.error_msg.setText("No internet")
            self.error_msg.setDetailedText(
                "If this keeps happening, it means easy repo stumbled upon a "
                "forbidden link. You might need to change your search string")
            self.error_msg.exec_()
            return False
Esempio n. 35
0
def randWord(wordType, wordComplexity):

	randURL = 'http://watchout4snakes.com'

	response = urlopen(urljoin(randURL, '/wo4snakes/Random/RandomWordPlus'))

	forms = ParseResponse(response, backwards_compat=False)
	form = forms[0]

	form['Pos'] = [wordType]
	form['Level'] = [wordComplexity]

	return urlopen(form.click()).read()
Esempio n. 36
0
def get_bclerk_results_text(case):
    print('get_bclerk_results_text(' + case + ')')
    uri = 'http://web1.brevardclerk.us/oncoreweb/search.aspx'
    response = urlopen(uri, timeout=5)
    forms = ParseResponse(response, backwards_compat=False)
    form = forms[0]
    #     print form
    form["txtCaseNumber"] = case  #"orozco"
    form["SearchType"] = 'casenumber'  #"orozco"
    form["txtDocTypes"] = ''  #'JRP, J' #"orozco"
    # form["txtName"] = "orozco"
    #     time.sleep(1)
    bclerk_results_text = urlopen(form.click()).read()
    return bclerk_results_text
Esempio n. 37
0
def get_rows_from_graframe_url(url3, radius):
    # print('get_rows_from_graframe_url(%s, %s)' % (url3, radius))
    r3 = urlopen(url3)
    forms = ParseResponse(r3, backwards_compat=False)
    # for f in forms:
    # print f
    form = forms[0]
    # print(form)
    form["select"] = ['Sales']
    form["radius"] = [radius]
    # print(form)
    rows = get_nearby_from_input(urlopen(form.click()))
    # pprint.pprint(rows)
    return rows
Esempio n. 38
0
def get_bclerk_results_text(case):
    print('get_bclerk_results_text('+case+')')
    uri = 'http://web1.brevardclerk.us/oncoreweb/search.aspx'
    response = urlopen(uri, timeout=5)
    forms = ParseResponse(response, backwards_compat=False)
    form = forms[0]
#     print form
    form["txtCaseNumber"] = case #"orozco"
    form["SearchType"] = 'casenumber' #"orozco"
    form["txtDocTypes"] = ''#'JRP, J' #"orozco"
# form["txtName"] = "orozco"
#     time.sleep(1)
    bclerk_results_text = urlopen(form.click()).read()
    return bclerk_results_text
Esempio n. 39
0
def get_rows_from_graframe_url(url3, radius):
    # print('get_rows_from_graframe_url(%s, %s)' % (url3, radius))
    r3 = urlopen(url3)
    forms = ParseResponse( r3, backwards_compat=False)
    # for f in forms:
        # print f
    form = forms[0]
    # print(form)
    form["select"] = ['Sales']
    form["radius"] = [radius]
    # print(form)
    rows=get_nearby_from_input(urlopen(form.click()))
    # pprint.pprint(rows)
    return rows
Esempio n. 40
0
def grab_redirect(link):
    response = mechanize.urlopen(link['href'])
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[0]
    data = mechanize.urlopen(form.click()).read()
    soup = BeautifulSoup.BeautifulSoup(data)
    for div in soup('div'):
        if 'class' in dict(div.attrs) and \
           div['class'] == 'urlworkaround':
            txt = ''.join([str(x) for x in div.contents])
            lsoup = BeautifulSoup.BeautifulSoup(txt)
            link = lsoup('a')[0]
            return link['href']
    raise Exception('no href')
Esempio n. 41
0
def getStartPageAndInputPassword():
    my_browser = mechanize.Browser(factory=mechanize.RobustFactory())
    my_browser.set_handle_robots(False)
    
    request = mechanize.Request("http://www.bokat.se/protected/groupInfo.jsp")
    response = mechanize.urlopen(request)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    loginForm = forms[0]
    loginForm["j_username"] = "******"
    loginForm["j_password"] = "******"
     
    try:
        response = mechanize.urlopen(loginForm.click())
    except HTTPError, e:
        sys.exit("post failed: %d: %s" % (e.code, e.msg))
def doSearch(movieTitle, movieYear):
    # Convert non-unicode characters
    movieTitle = removeNonUnicodeChars(movieTitle)

    url = "https://awesome-hd.net/torrents.php"
    response = mechanize.urlopen(url)

    # Search
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[0]
    form["searchstr"] = movieTitle

    html = mechanize.urlopen(form.click()).readlines()

    return html
Esempio n. 43
0
def start_cloning(options):
    link = options['link']
    user = options['user']
    password = options['password']
    response = mechanize.urlopen(link)
    forms = mechanize.ParseResponse(response, backwards_compat=False)
    form = forms[0]
    form['txtIdentifiant'] = user
    form['txtMDP'] = password
    website = mechanize.urlopen(form.click())
    data = website.read()
    outfile = open('index.html', 'wt')
    print >> outfile, """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html dir="ltr" lang="fr" xml:lang="fr" xmlns="http://www.w3.org/1999/xhtml"
class="yui3-js-enabled" id="yui_3_2_0_1_1326674808791714">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
"""
    soup = BeautifulSoup.BeautifulSoup(data)
    title = soup('title')
    print >> outfile, str(title[0])
    divs = soup('div')
    for div in divs:
        if 'class' in dict(div.attrs):
            if div['class'] == 'course-content':
                vstr = '\n'.join([str(x) for x in div.contents[1:]])
                # Eliminate wrong divs
                lsoup = BeautifulSoup.BeautifulSoup(vstr)
                for ldiv in lsoup.findAll('div'):
                    if ('class' in dict(ldiv.attrs) and ldiv['class']
                            in ['left side', 'right side', 'jumpmenu']):
                        ldiv.extract()
                replace = {}
                for link in lsoup.findAll('a'):
                    if 'href' in dict(link.attrs):
                        try:
                            replace[link['href']] = grab_redirect(link)
                        except:
                            pass
                page_txt = str(lsoup)
                for k, v in replace.items():
                    nw_key = str(k) + "&amp;redirect=1"
                    page_txt = page_txt.replace(nw_key, str(v))
                    page_txt = page_txt.replace(str(k), str(v))
                print >> outfile, page_txt
    outfile.close()
Esempio n. 44
0
    def get_attachment_file(self, attachment, form):
        """
        Loads the attachment file from the server and stores it into
        the attachment object given as a parameter. The form
        parameter is the mechanize Form to be submitted for downloading
        the attachment.

        The attachment parameter has to be an object of type
        model.attachment.Attachment.
        """
        time.sleep(self.config.WAIT_TIME)
        logging.info("Getting attachment '%s'", attachment.identifier)
        if self.options.verbose:
            print "Getting attachment '%s'" % attachment.identifier
        mechanize_request = form.click()
        try:
            mform_response = mechanize.urlopen(mechanize_request)
            mform_url = mform_response.geturl()
            if self.list_in_string(self.urls['ATTACHMENT_DOWNLOAD_TARGET'],
                                   mform_url):
                attachment.content = mform_response.read()
                attachment.mimetype = magic.from_buffer(attachment.content,
                                                        mime=True)
                attachment.filename = self.make_attachment_filename(
                    attachment.identifier, attachment.mimetype)
            else:
                logging.warn("Unexpected form target URL '%s'", mform_url)
                if self.options.verbose:
                    sys.stderr.write("Unexpected form target URL '%s'\n" %
                                     mform_url)
        except mechanize.HTTPError as e:
            logging.warn("HTTP Error: code %s, info: %s", e.code, e.msg)
            if self.options.verbose:
                print "HTTP-FEHLER:", e.code, e.msg
        return attachment
Esempio n. 45
0
def send_to_WEBFORM(img_file):
    printlog("Send WEB FORM test")
    myWEB = 'http://185.139.68.199/timebox/default/display_form.html'
    FILENAME = ''
    FILENAME = img_file
    printlog('Send test' + img_file)
    br = mechanize.Browser()
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'
    )]
    tri = 0
    connect = False
    while not connect:
        try:
            rr = br.open(myWEB)
            response = urlopen(
                "http://185.139.68.199/timebox/default/display_form.html")
            br.select_form(nr=0)

            br['cam_id'] = ['1']
            connect = True

        except mechanize.URLError as e:
            print e.reason.args
            tri += 1
            if rti > 4:
                exit()
            sleep(20)

    br.form.add_file(open(FILENAME), 'text/plain', FILENAME)
    br.form.set_all_readonly(False)
    br.submit()
Esempio n. 46
0
    def get_attachment_file(self, attachment, form):
        """
        Loads the attachment file from the server and stores it into
        the attachment object given as a parameter. The form
        parameter is the mechanize Form to be submitted for downloading
        the attachment.

        The attachment parameter has to be an object of type
        model.attachment.Attachment.
        """
        time.sleep(self.config.WAIT_TIME)
        logging.info("Getting attachment '%s'", attachment.identifier)
        if self.options.verbose:
            print "Getting attachment '%s'" % attachment.identifier
        mechanize_request = form.click()
        try:
            mform_response = mechanize.urlopen(mechanize_request)
            mform_url = mform_response.geturl()
            if self.list_in_string(self.urls['ATTACHMENT_DOWNLOAD_TARGET'], mform_url):
                attachment.content = mform_response.read()
                attachment.mimetype = magic.from_buffer(attachment.content, mime=True)
                attachment.filename = self.make_attachment_filename(attachment.identifier, attachment.mimetype)
            else:
                logging.warn("Unexpected form target URL '%s'", mform_url)
                if self.options.verbose:
                    sys.stderr.write("Unexpected form target URL '%s'\n" % mform_url)
        except mechanize.HTTPError as e:
            logging.warn("HTTP Error: code %s, info: %s", e.code, e.msg)
            if self.options.verbose:
                print "HTTP-FEHLER:", e.code, e.msg
        return attachment
def get_html(url, file_html = None):
    '''
        Faz o download do HTML referente a uma URL.
        
        @url: URL a ser extraido o HTML
        @file_html: Arquivo em que deve ser gravado o HTML proveniente da URL passada.
                    Caso seja nulo, o arquivo não é gravado.
        @return: O HTML referente à URL informada como parâmetro.
    '''
    
    try:
        link_expandido = mechanize.urlopen(url, timeout=45.0).geturl()#Expande a url antes de ser tratada e, caso seja uma propaganda, a url da notícia é retornado
        browser.open(link_expandido, timeout=45.0)# Acesso à url
        html = browser.response().read().replace('\r', '\n').replace('DIV', 'div') #Padroniza as quebras de linha e divs
    except:
        try:
            #Caso ocorra algum erro com o Mechanize
            html = urllib2.urlopen(url).read()
        except:
            browser.close()
            return 'TIMEOUT'#Página não pode ser carregada - Espera-se que esta parte não seja executada!
    
    
    #Salva o HTML em um arquivo especificado como parâmetro
    if file_html != None:
        arq = open(file_html, "w")
        print >> arq, html
        arq.close()
        
    browser.close()
    
    return html
Esempio n. 48
0
def get_ics_calendar(url=URL):
    """
    Get icalendar from website.
    """
    response = mechanize.urlopen(url)
    response = response.read()
    return response
def readUrl(inUrl):

    tryCount = 0
    while tryCount < 5 :
#        print "Create CookieJar"
        cookies = mechanize.CookieJar()
#        print "Build Opener"
        opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
#        print "Add Headers"
        opener.addheaders = [("User-agent", "Mozilla/5.0 (compatible; MyProgram/0.1)"),("From", "*****@*****.**")]
#        print "Install Opener"
        mechanize.install_opener(opener)
        try:
#            print "Open URL"
            response = mechanize.urlopen(inUrl)
            tryCount = 99
        except:
            tryCount += 1
            print "******** Error on urlopen ***********"
            print "URL: ", inUrl
            print "Trying Again....", tryCount

#    print response.read()
#    html = urllib.urlopen(inUrl).read()
#    print "Reading Response"
    html = response.read()
#    print "Response Read:", html[0:100]
    root = lxml.html.fromstring(html)
#    print "Root created: ", root

    return root
	def getUrl(self,url):
		req = mechanize.Request(url)
		req.add_header('User-Agent', canna_agent)
		response = mechanize.urlopen(req)
		link = response.read()
		response.close()
		return link
Esempio n. 51
0
def parse(str_to_parse=""):
    results = []
    tree = html.fromstring(str_to_parse)
    tbl = tree.xpath(table_xpath)[0]

    for row in tbl.xpath(row_xpath):
        result_dict = {}

        if within_date_range(row) and within_price_range(row):
            ln = row.xpath('./td[2]/a')[0]
            result_dict['url'] = ln.get('href')
            result_dict['prop_name'] = ln.text.strip()
            pg_response = urlopen(result_dict['url'])
            new_tree = html.fromstring(pg_response.read())
            adparams = new_tree.xpath('//table[@class="AdParams"]//tr/td')

            #print 'adparams is %s' % adparams

            for param in adparams:
                txt = ''.join(param.xpath('.//text()')).strip().replace(
                    '\n', '').replace('\t', '').split(':')
                result_dict[txt[0]] = txt[1]
            #print 'result_dict is %s' % result_dict

            results.append(result_dict)
    return results
Esempio n. 52
0
	def getUrl(self,url):
		req = mechanize.Request(url)
		req.add_header('User-Agent', ' Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
		response = mechanize.urlopen(req)
		link = response.read()
		response.close()
		return link
Esempio n. 53
0
def downloadtable(url):
    br = mechanize.urlopen(url)
    soup = BeautifulSoup.BeautifulSoup(br.read())

    table = soup.find(id='myTable')
    for row in table('tr'):
        yield [i.string for i in row('td')]
Esempio n. 54
0
def get(request, offset):
    import mechanize
    import json

    allow = offset
    dont_allow = ""
    if "+not+" in offset:
        [allow, dont_allow] = offset.split("+not+")
    s = ""
    for word in allow.split("+"):
        s += "&allowedIngredient[]=%s" % (word)
    for word in dont_allow.split("+"):
        s += "&excludeIngredient[]=%s" % (word)
    url = "http://api.yummly.com/v1/api/recipes?_app_id=%s&_app_key=%s%s&requirePictures=true" % (APP_ID, APP_KEY, s)
    res = mechanize.urlopen(url)
    page = "".join(str(line) for line in res)
    result = json.loads(page)
    results = []

    for re in result["matches"]:
        re["smallImageUrls"] = [re["smallImageUrls"][0].replace(".s.", ".l.")]
        name = re["recipeName"]
        if len(name) > 40:
            name = name[:37] + "..."
        re["recipeName"] = name
        results.append(re)
    return render_to_response(
        "search.html",
        {"search_results": results, "term": offset.replace("+", ", ").title()},
        context_instance=RequestContext(request),
    )
Esempio n. 55
0
    def test_geturl(self):
        # Make sure same URL as opened is returned by geturl.
        handler = self._make_request_handler([(200, [], "we don't care")])

        open_url = mechanize.urlopen("http://localhost:%s" % handler.port)
        url = open_url.geturl()
        self.assertEqual(url, "http://localhost:%s" % handler.port)
Esempio n. 56
0
  def _scrapeUrl(self, url):
      """scrape a generic url
      """
      #grab the data -- go internets!
      request3 = mechanize.Request(url)
      self.cj.add_cookie_header(request3)
      response3 = mechanize.urlopen(request3)
      maincontent = response3.read()
      #make the soup
      soup = BeautifulSoup(maincontent)
      
      #parse the soup
      #This thing is a beast

      # date/times and games are intersperced
      # The first thing should be a date
      # then all games following are on that date
      # So - we find all dates and games with our query and handle them
      # as they happen in order
      date=None
      tags = soup.findAll(**{'class':["schedules-list-date", 'schedules-list-hd pre', 'schedules-list-hd post']})
      print "found %s tags" %len(tags)
      for tag in tags:
        #we got a date!
        if tag['class']=='schedules-list-date':
          #we've found a new date
          gameDateStr = str(tag.find('span').text)
          monthStr, date = gameDateStr.split(',')[1].strip().split()
          monthNum = self.MONTH_MAP[str(monthStr)]
          if monthNum in (1,2):
            year = self.year+1
          else:
            year = self.year
          dateInt = int(''.join([x for x in date if x.isdigit()]))
          date = datetime.date(year, monthNum, dateInt) 
        else:
          #we've got a new game -parse out home and away team
          home = str(tag.find(**{'class':['team-name home ', 'team-name home lost']}).text)
          away = str(tag.find(**{'class':['team-name away ', 'team-name away lost']}).text)
          #need to get the time as well
          time = str(tag.find(**{'class':'time'}).text)
          if time=='FINAL':
            print "CANNOT GET VALID TIME FOR GAMES that are in the past" 
            hr=0
            minute=0
          else:
            hr, minute = time.split(':')
            amPm = str(tag.find(**{'class':['am', 'pm']}).text).strip()
            hr = int(hr)
            minute=int(minute)
            #adjust times to take into account am/pm  
            if amPm=="PM" and hr <12:      
              hr+=12
            if amPm=="AM" and hr==12:
              hr=0
          d={'week':self.week,
             'home':self.TEAM_MAP[home],
             'away':self.TEAM_MAP[away],
             'kickoff':datetime.datetime(date.year, date.month, date.day, hr, minute, tzinfo=self.EASTERN_TIME_ZONE)}
          self.games.append(d)