def process_event(self, event): url, data = self.build_url_and_data(event) headers = {"Content-Type": "application/json"} try: urlopen(url, data=data, http_headers=headers.items()) return True except IOError: return False
def test(self, timeout=3): import urlgrabber try: urlgrabber.urlopen(self.url.get_uri(), http_headers = self._get_http_headers(), ftp_headers = self._get_ftp_headers(), proxies = self._get_proxies(), timeout = timeout, user_agent = 'PiSi Fetcher/' + pisi.__version__) except urlgrabber.grabber.URLGrabError: return False return True
def versionFromFile(f): """Given a file or URL, look for a line starting with #version= and return the version number. If no version is found, return DEVEL. """ v = DEVEL fh = urlopen(f) while True: try: l = fh.readline() except StopIteration: break # At the end of the file? if l == "": break if l.isspace() or l.strip() == "": continue if l[:9] == "#version=": v = stringToVersion(l[9:].rstrip()) break fh.close() return v
def __parse_jeos_images(self): log = logging.getLogger('%s.%s' % (__name__, self.__class__.__name__)) config_urls = self.configuration['jeos_config'] for url in config_urls: filehandle = urlopen(str(url)) line = filehandle.readline().strip() line_number = 1 while line: # Lines that start with '#' are a comment if line[0] == "#": pass # Lines that are zero length are whitespace elif len(line.split()) == 0: pass else: image_detail = line.split(":") if len(image_detail) >= 6: self.__add_jeos_image(image_detail) else: log.warning("Failed to parse line %d in JEOS config (%s):\n%s" % (line_number, url, line)) line = filehandle.readline() line_number += 1 filehandle.close()
def get_remote_hash(self, branch): '''Return the git-hash for the most recent commit on the specified branch''' assert isinstance(branch, str), "branch argument must be a string" u = urlparse.urlparse(self.git_url) if u.scheme == 'git': if u.netloc == 'github.com': # Formulate API call json_url = "http://%s/api/v2/json/repos/show%s/branches" \ % (u.netloc, re.sub(r'\.git$', '', u.path)) json_data = json.loads(urlgrabber.urlopen(json_url).read()) if isinstance(json_data, dict): return json_data.get('branches', {}).get(branch, 'UNKNOWN') else: logging.error("Unknown json data format: %s" % type(json_data)) else: (rc, out) = call("git ls-remote %s refs/heads/%s" % (self.git_url, branch)) if rc == 0: out = out.strip() # yank off newline char return out.split()[0] else: logging.error("Unable to query repository: %s" % u) else: logging.error("Unhandled SCM format: %s" % u.scheme)
def runMythTuner(): #Yes this is bad, but the bindings can't be trusted. role=runMythRole() num_encoders = -1 if role == "StandAlone" or role == "Master backend" or role == "Master backend with Frontend": fencoder = False url="http://localhost:6544" try: web_page = urlgrabber.urlopen(url).readlines() except: return 0 num_encoders = 0 for line in web_page: line = line.strip() if line == '<h2>Encoder status</h2>': fencoder= True continue if fencoder: #print line encoders = line.split('.<br />') for encoder in encoders: if encoder.find("currently not connected") == -1 and encoder.startswith("Encoder"): num_encoders = num_encoders + 1 if line == '<div class="content">': break return num_encoders
def setInstallData(self, anaconda): silvereye.InstallClass.setInstallData(self, anaconda) anaconda.id.firewall.portlist.extend([ '53:tcp', '53:udp', '67:udp', '3260:tcp', '8443:tcp', '8772:tcp', '8773:tcp', '8774:tcp', '8888:tcp']) if flags.cmdline.has_key("eucaconf"): try: f = urlgrabber.urlopen(flags.cmdline["eucaconf"]) eucaconf = open('/tmp/eucalyptus.conf', 'w') eucaconf.write(f.read()) f.close() eucaconf.close() except urlgrabber.grabber.URLGrabError as e: if anaconda.intf: rc = anaconda.intf.messageWindow( _("Warning! eucalyptus.conf download failed"), _("The following error was encountered while" " downloading the eucalyptus.conf file:\n\n%s" % e), type="custom", custom_icon="warning", custom_buttons=[_("_Exit"), _("_Install anyway")]) if not rc: sys.exit(0) else: sys.exit(0) else: pass
def f(idx, q,r): path = "data%s"%(idx) os.makedirs(path) while True: item = q.get() if( item.item_type == ITEM_QUIT ): break; count = 0 localQueue = Queue() current = item.data while True: print current fo = urlopen(current) data = fo.read() name = "%s/%s"%(path,count) fw = open( name, "w" ) count = count + 1 fw.write(data) fw.close() fo.close() p = MyHTMLParser() try: p.feed(data) except: pass for href in p.hrefs: print item.data, ": ", href try: current = localQueue.get_nowait() except: break;
def get( self ): """ Convert feed source (be it opml, RSS etc) into a list of dictionaries containing titles and urls. This list of dictionaries can then be used to regenerate the user config file. """ #using urlgrabber so it doesn't matter whether feed is a file or a url logger.debug("Opening feed: " + self.feed) fd = urlopen( self.feed ) feed = {} #is this an OPML file? try: outlines = OPML.parse( fd ).outlines logger.debug("Feed is OPML") for opmlfeed in outlines: feed = {} feed["title"] = opmlfeed["title"] feed["url"] = opmlfeed["xmlUrl"] self.feedlist.append( feed ) logger.debug("Feed has been imported: %s - %s" % (feed["title"], feed["url"])) except Exception, e: feed = {} try: if self.title: feed["title"] = self.title else: outlines = feedparser.parse( self.feed )["feed"] feed["title"] = outlines.title feed["url"] = self.feed self.feedlist.append(feed) logger.debug("Feed has been imported: %s - %s" % (feed["title"], feed["url"])) except Exception, e: print "Feedparser exception:", e sys.exit(-1)
def preprocessKickstart (file): """Preprocess the kickstart file, given by the filename file. This method is currently only useful for handling %ksappend lines, which need to be fetched before the real kickstart parser can be run. Returns the location of the complete kickstart file. """ try: fh = urlopen(file) except grabber.URLGrabError, e: raise IOError, formatErrorMsg(0, msg=_("Unable to open input kickstart file: %s") % e.strerror)
def findUrl(arglist): # arglist[0] is the urlList to search (set() removes duplicates) this_urllist = set(arglist[0]) # arglist[1] is the FE to find this_FE = arglist[1] result = [] for this_url in this_urllist: soup = BeautifulSoup(urlopen(this_url)) if soup.find_all(text = re.compile(this_FE)): result.append(this_url) return result
def _read_locklist(): locklist = [] try: llfile = urlgrabber.urlopen(fileurl) for line in llfile.readlines(): if line.startswith('#') or line.strip() == '': continue locklist.append(line.rstrip()) llfile.close() except urlgrabber.grabber.URLGrabError, e: raise PluginYumExit('Unable to read version lock configuration: %s' % e)
def getHtml(url, showUrl=False): if showUrl : logging.info(url) try: page = urlopen(url) html = page.read() page.close() return html except URLGrabError: logging.error('exce url:' + url) return ""
def getHtml(url, showUrl=False): if showUrl : print(url) try: page = urlopen(url) html = page.read() page.close() return html except URLGrabError: print('exce url', url) return ""
def __parse_jeos_images(self): log = logging.getLogger('%s.%s' % (__name__, self.__class__.__name__)) config_urls = self.configuration['jeos_config'] # Expand directories from the config and url-ify files # Read inlist - replace directories with their contents nextlist = [ ] for path in config_urls: if os.path.isdir(path): for filename in os.listdir(path): fullname = os.path.join(path, filename) if os.path.isfile(fullname): nextlist.append(fullname) else: nextlist.append(path) # Read nextlist - replace files with file:// URLs finalist = [ ] for path in nextlist: if os.path.isfile(path): finalist.append("file://" + path) else: finalist.append(path) for url in finalist: try: filehandle = urlopen(str(url)) line = filehandle.readline().strip() except: log.warning("Failed to open JEOS URL (%s)" % url) continue line_number = 1 while line: # Lines that start with '#' are a comment if line[0] == "#": pass # Lines that are zero length are whitespace elif len(line.split()) == 0: pass else: image_detail = line.split(":") if len(image_detail) >= 6: self.__add_jeos_image(image_detail) else: log.warning("Failed to parse line %d in JEOS config (%s):\n%s" % (line_number, url, line)) line = filehandle.readline() line_number += 1 filehandle.close()
def get(url): mod_url = url if url.find(PACKAGE_PREFIX) == 0: mod_url = url[len(PACKAGE_PREFIX):] pos = mod_url.find('/') if pos == -1: raise Exception("Could not parse package:// format into file:// format for "+url) package = mod_url[0:pos] mod_url = mod_url[pos:] package_path = rospack_find(package) mod_url = "file://" + package_path + mod_url; return urlgrabber.urlopen(mod_url)
def findText(arglist): # arglist[0] is the urlList to search (set() removes duplicates) this_urllist = set(arglist[0]) #arglist[1] is the FE to find this_FE = arglist[1] parents_visited = [] result = [] for this_url in this_urllist: soup = BeautifulSoup(urlopen(this_url)) for this_tag in soup.find_all(text = re.compile(this_FE)): this_parent = this_tag.parent if this_parent in parents_visited: continue parents_visited.append(this_parent) this_text = '' for this_sibling in this_tag.parent.children: this_text += this_sibling.string result.append(this_text) return result
def sanity_check_repodata(myurl): """ Sanity check the repodata for a given repository. Initial implementation by Seth Vidal. """ myurl = str(myurl) tempdir = tempfile.mkdtemp() errorstrings = [] if myurl[-1] != '/': myurl += '/' baseurl = myurl if not myurl.endswith('repodata/'): myurl += 'repodata/' else: baseurl = baseurl.replace('repodata/', '/') rf = myurl + 'repomd.xml' try: rm = urlgrabber.urlopen(rf) repomd = repoMDObject.RepoMD('foo', rm) for t in repomd.fileTypes(): data = repomd.getData(t) base, href = data.location if base: loc = base + '/' + href else: loc = baseurl + href destfn = tempdir + '/' + os.path.basename(href) dest = urlgrabber.urlgrab(loc, destfn) ctype, known_csum = data.checksum csum = checksum(ctype, dest) if csum != known_csum: errorstrings.append("checksum: %s" % t) if href.find('xml') != -1: decompressed = decompress(dest) retcode = subprocess.call(['/usr/bin/xmllint', '--noout', decompressed]) if retcode != 0: errorstrings.append("failed xml read: %s" % t) except urlgrabber.grabber.URLGrabError, e: errorstrings.append('Error accessing repository %s' % e)
def readKickstart(self, f, reset=True): """Process a kickstart file, given by the filename f.""" if reset: self._reset() # an %include might not specify a full path. if we don't try to figure # out what the path should have been, then we're unable to find it # requiring full path specification, though, sucks. so let's make # the reading "smart" by keeping track of what the path is at each # include depth. if not os.path.exists(f): if self.currentdir.has_key(self._includeDepth - 1): if os.path.exists(os.path.join(self.currentdir[self._includeDepth - 1], f)): f = os.path.join(self.currentdir[self._includeDepth - 1], f) cd = os.path.dirname(f) if not cd.startswith("/"): cd = os.path.abspath(cd) self.currentdir[self._includeDepth] = cd try: fh = urlopen(f) except grabber.URLGrabError, e: raise IOError, formatErrorMsg(0, msg=_("Unable to open input kickstart file: %s") % e.strerror)
) (options, args) = parser.parse_args() if len(args) == 0: parser.error('One or more bundle keys are required') progress_printer = ProgressPrint() grabber = URLGrabber(prefix=options.gmb_url, progress_obj=progress_printer) # Download the albums for each key for key in args: # Get download page and grab all download URLs download_page_url = urljoin(options.gmb_url, '/download?key=%s' % key) download_page = urlopen(download_page_url) html = download_page.read() soup = BeautifulSoup(html, 'lxml') download_page.close() # Find all download links regex_download_link = re.compile('/download\?.*') download_links = [x['href'] for x in soup.find_all('a', href=regex_download_link)] album_urls = merge_album_links(download_links) print 'Going to download %d album(s)' % len(album_urls) for url in album_urls.values(): # Switch to output directory as urlgrabber downloads to the current dir os.chdir(options.output_dir)
def test_urlopen(self): "module-level urlopen() function" fo = urlgrabber.urlopen('http://www.python.org') fo.close()
print 'Archmage could not extract '+chmFile+', continuing from next loop iteration' nameStr='touch \''+chmFilePath+'!!!_ArchiveExtractFail\'' os.system(nameStr) continue #skip this file #mainPageFile=searchRes[1]['Main Page'] if (not searchRes[0]): print 'pychm searched for ISBN: searchRes empty for '+chmFile+', continuing to next loop iteration' nameStr='touch \''+chmFilePath+'!!!_ISBNSearchFail\'' os.system(nameStr) continue try: mainPageFile=searchRes[1].items()[0][1] mainPagePath=tempPath+mainPageFile page=urlgrabber.urlopen(mainPagePath) except Exception,e: mainPageFile=searchRes[1].items()[0][1] mainPageFile=mainPageFile.lower() mainPagePath=tempPath+mainPageFile page=urlgrabber.urlopen(mainPagePath) soup = BeautifulSoup(page) resSoup=soup.body.find(text=re.compile(r'ISBN')) #here, check to see how many characters come after 'ISBN' #if the number is more than 8, we likely are given the ISBN number in #resSoup.string, and so we should grab it ISBNStart=resSoup.string.find('ISBN') ISBNEnd=ISBNStart+len('ISBN')
def index(req): rv = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>ECE 459: Programming for Performance</title> <script type="text/javascript" src="/ui/alternate_rows.js"></script> <link rel="stylesheet" type="text/css" href="alternating-rows.css" /> <style type="text/css"> td {vertical-align:top} </style> <style type="text/css" media="screen"> @import url("http://www.uwaterloo.ca/css/UWblank.css"); </style> <!-- TemplateBeginIf cond="collage" --> <!-- <style type="text/css" media="screen"> @import url("http://www.uwaterloo.ca/css/UWhome.css"); </style>--> <!-- TemplateEndIf --><!-- TemplateBeginIf cond="rightnavmenu==false" --> <style type="text/css" media="screen"> @import url("http://www.uwaterloo.ca/css/UW2col.css"); </style> <!-- TemplateEndIf --><!-- TemplateBeginIf cond="rightnavmenu" --> <!-- <style type="text/css" media="screen"> @import url("http://www.uwaterloo.ca/css/UW3col.css"); </style> --> <!-- TemplateEndIf --> <!-- conditional comment added for IE 6 printing, IE 5.5 will not print this page very well --> <!--[if IE 6]> <style type="text/css" media="print"> @import url("http://www.uwaterloo.ca/css/UWprint.css"); </style> <![endif]--> <!-- this print will work in W3 Standard compliant browsers --> <style type="text/css"> @import url("http://www.uwaterloo.ca/css/UWprint.css") print; </style> <style type="text/css" media="screen"> @import url("/css/ece.css"); </style> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" /> <meta http-equiv="Content-Language" content="en-us" /> <!-- fill in below according to your site --> <meta name="description" content="Programming for Performance" /> <meta name="keywords" content="programming parallelization" /> <meta name="author" content="Patrick Lam" /> <meta name="author" content="Design - Jesse Rodgers ([email protected])" /> <meta name="version" content="XHTML Version 1.0p1" /> <!-- optional regions --> <!-- TemplateParam name="submenu" type="text" value="0" --> <!-- TemplateParam name="collage" type="boolean" value="false" --> <!-- TemplateParam name="rightnavmenu" type="boolean" value="false" --> </head> <body> <!-- header --> <div id="header"> <div id="clfbar"> <div id="uwlogo"> <a href="http://www.uwaterloo.ca"> <img src="http://www.uwaterloo.ca/images/template/uwlogo.gif" alt="Link to the University of Waterloo home page" width="105" height="70" border="0" /> </a> </div> <div id="searchbox"> <!-- form script is located on info, there are options though. Information is at http://web.uwaterloo.ca/clftemplate/search.html --> <form action="http://info.uwaterloo.ca/clfscripts/uwsearch.php" method="post" name="search" target="_blank" id="search" title="search" dir="ltr" lang="en"> Search <input type="radio" name="site" value="ece.uwaterloo.ca" checked="checked"/> in ECE <input type="radio" name="site" value="uwaterloo.ca"/> all of UW <input name="searchterm" type="text" id="searchterm" class="google" accesskey="s" tabindex="2" size="20" /> <input name="submit" type="submit" id="submit" class="google" tabindex="3" value="Search" /> </form> </div> <div id="wordmark"> <h1><a href="index.html"> <!-- replace title image with your own DO NOT FORGET ALT TAG!!! --> <img src="/files/clear.gif" alt="Department of Electrical and Computer Engineering" width="400" height="30" border="0" /> </a></h1> </div> </div> </div> <span class="none"><a href="#content">Skip to the content of the web site.</a></span> <!-- primary nav, add or delete links as you desire --> <div id="primarynavarea"> <ul id="primarynav"> <li><a href="/p4p/"><b>Home</b></a></li> <li><a href="/p4p/leaderboard/leaders.py">A3 Leaderboard</a></li> <li><a href="/p4p/notes/">Lecture notes</a></li> <li><a href="/p4p/exams.shtml">Exam information</a></li> <li><a href="/p4p/files/assignment-01.pdf">Assignment 1 (PDF)</a></li> <li><a href="/p4p/files/assignment-02.pdf">Assignment 2 (PDF)</a></li> <li><a href="/p4p/files/assignment-03.pdf">Assignment 3 (PDF)</a></li> <li><a href="/p4p/files/assignment-04.pdf">Assignment 4 (PDF)</a></li> <!-- <li><a href="/p4p/a4notes.shtml">Assignment 4 notes</a></li>--> <!-- TemplateEndIf --> </ul> </div> <!-- content --> <a name="content" id="content"></a> <!-- TemplateBeginIf cond="collage == false" --> <div id="contentbar"> <!-- this causes the warning about p tags on saving the template, just ignore --> <!-- TemplateBeginEditable name="collage == false" --> <h2> ECE459: Programming for Performance, W13 </h2> <!-- TemplateEndEditable --> </div> <!-- TemplateEndIf --> <!-- DO NOT FORGET ALT TAG!!!! --> <!-- TemplateBeginIf cond="collage" --> <!-- <div id="collage"> <img src="images/Collage3.jpg" alt=" " /> </div> --> <!-- TemplateEndIf --> <div id="primarycontarea"> <div id="primarycontent"> <!-- InstanceBeginEditable name="primarycontent" --> <table> """ lb = urlopen("http://ece459-1.uwaterloo.ca/leaders.csv") reader = csv.reader(lb) parity = 0 for row in reader: parity = 1-parity s = "" if (parity == 1): s = s + "background:#ddd" rv = rv + "<tr style='"+s+"'><td style='padding-right:1em'>"+row[0]+"</td><td style='text-align:right'>"+row[1]+"</td></tr>" lb.close() rv = rv + """</table> </div> </div> <!-- footer --> <div id="footer"> <div id="departmentaddress"> <a href="http://campaign.uwaterloo.ca"><img src="http://www.uwaterloo.ca/images/template/littlecampaignlogo.gif" alt="Campaign Waterloo" class="campaignlogo" /></a> <p> Patrick Lam <br /> Department of Electrical and Computer Engineering<br /> University of Waterloo<br /> 200 University Avenue West<br /> Waterloo, Ontario, Canada N2L 3G1<br /> 519 888 4567 ext. 36433 <br /> <br /> <a href="mailto:p.lam[at]ece.uwaterloo.ca">contact us</a> | <a href="mailto:p.lam[at]ece.uwaterloo.ca">give us feedback</a> | <a href="http://www.uwaterloo.ca">University of Waterloo Home Page </a><br /> </p> </div> </div> </body> <!-- InstanceEnd --></html> """ return rv
def main(): server = "http://plain.resources.ovirt.org" if len(sys.argv) != 3: print("Usage:") print( "{command} {job} {repo}".format( command=sys.argv[0], job=( "http://jenkins.ovirt.org/view/Publishers/job/" "publish_ovirt_rpms_nightly_3.5/73/console" ), repo="/repos/ovirt-3.5-pre", ) ) sys.exit(1) job = sys.argv[1] baseurl = sys.argv[2] u = urlgrabber.urlopen(job) content = u.read() u.close() required = [] for line in content.splitlines(): if line.find('SSH: put') != -1: filename = line[line.find('[')+1:line.find(']')] if filename not in required: required.append(filename) if filename.endswith('.tar.gz'): required.append(filename + '.sig') print("------------------------------") print("Checking Jenkins jobs goodness") print("------------------------------") print("publisher job: %s" % job) print( "repository: {server}{baseurl}\n\n".format( server=server, baseurl=baseurl, ) ) m = re.compile(r'^(?P<package>([a-zA-Z0-9]+\-)+[0-9\.]+[_0-9a-zA-Z\.]*)') for filename in required: if filename.endswith('.src.rpm'): package = m.match(filename) if package is not None: tarball = package.groupdict()['package'] + ".tar.gz" if tarball not in required: print( ( "missing sources : {tarball}\n" "for rpm: {rpm}\n" "found:\n" ).format( tarball=tarball, rpm=filename, ) ) for x in required: if ( x.startswith(package.groupdict()['package']) and x.endswith('tar.gz') ): print(x) not_required = [] queue = collections.deque() queue.append( "{server}{baseurl}".format( server=server, baseurl=baseurl, ) ) m = re.compile('href="([^"]*)"') print( "\n\n\n" "-------------------------------------------------------\n" "Checking expected repository content from publisher job\n" "-------------------------------------------------------\n" ) while queue: newitem = queue.popleft() print("processing %s" % newitem) u = urlgrabber.urlopen(newitem) root = u.read() u.close() for x in m.findall(root): if not ( x.startswith('?') or x.startswith('/') ): if ( x.endswith('.rpm') or x.endswith('.iso') or x.endswith('.exe') or x.endswith('.gz') or x.endswith('.sig') or x.endswith('.bz2') or x.endswith('.xml') or x.endswith('.zip') ): if x in required: required.remove(x) else: not_required.append(x) else: queue.append( "{baseurl}/{x}".format( baseurl=newitem, x=x, ) ) print( "The following packages were in the publisher job and are " "missing in the repo:" ) for x in required: print x
# comment out the next line to make exceptions non-fatal from exception import initExceptionHandling anaconda.mehConfig = initExceptionHandling(anaconda) # add our own additional signal handlers signal.signal(signal.SIGUSR2, lambda signum, frame: anaconda.dumpState()) anaconda.setDispatch() # download and run Dogtail script if opts.dogtail: try: import urlgrabber try: fr = urlgrabber.urlopen(opts.dogtail) except urlgrabber.grabber.URLGrabError, e: log.error("Could not retrieve Dogtail script from %s.\nError was\n%s" % (opts.dogtail, e)) fr = None if fr: (fw, testcase) = mkstemp(prefix='testcase.py.', dir='/tmp') os.write(fw, fr.read()) fr.close() os.close(fw) # download completed, run the test if not os.fork(): # we are in the child os.chmod(testcase, 0o755) os.execv(testcase, [testcase])
def open(self, unused_mode="r"): try: self.fd = urlgrabber.urlopen(self.source) except urlgrabber.grabber.URLGrabError, e: raise IOError, str(e)
#use mimms? from BeautifulSoup import BeautifulSoup import re import urlgrabber import urllib import urllib2 MSRpageURL="http://www.researchchannel.org/prog/displayinst.aspx?fID=880&pID=480" MSRbaseURL="http://content.digitalwell.washington.edu/msr/external_release_talks_12_05_2005/" UWCSE2007pageURL="http://www.researchchannel.org/prog/displayseries.aspx?path=1&fID=2318&pID=497" UWCSE2008pageURL="http://www.researchchannel.org/prog/displayseries.aspx?path=1&fID=4946&pID=497" page=urlgrabber.urlopen(MSRpageURL) soup=BeautifulSoup(page) lecResList=soup.findAll('a','bluelink') for lecRes in lecResList: lecTitle=lecRes.contents[0] print lecTitle lecInfoURL=lecRes.attrs[2][1] lecPage=urlgrabber.urlopen(lecInfoURL) lecSoup=BeautifulSoup(lecPage) try: lecDate=lecSoup.findAll('span',{'id':'mediaGroupProductionDate'})[0].contents[0] except Exception, e: print e
titleCln=re.sub(r'Book #[\d]','',titleCln) authorCln=author.rstrip().strip('"').replace('Author: ','') authorCln=authorCln.replace('/',' ') titleQry=titleCln.replace(' ','+'); authorQry=authorCln.replace(' ','+') queryStr=titleQry+'+'+authorQry titleSearchURL='http://books.google.com/books?client=firefox-a&um=1&q='+titleQry+'&btnG=Search+Books' advSearchURL='http://books.google.com/books?as_q=&num=10&client=firefox-a&btnG=Google+Search&as_epq=&as_oq=&as_eq=&as_libcat=0&as_brr=0&lr=&as_vt='+titleQry+'&as_auth='+authorQry+'&as_pub=&as_sub=&as_drrb=c&as_miny=&as_maxy=&as_isbn=' basSearchURL='http://books.google.com/books?client=firefox-a&um=1&q='+queryStr+'&btnG=Search+Books' searchURL=advSearchURL; searchResPage=urlgrabber.urlopen(searchURL) searchResSoup=BeautifulSoup(searchResPage) bookLinkList=searchResSoup.find('h2','resbdy'); if not bookLinkList: searchURL=basSearchURL; searchResPage=urlgrabber.urlopen(searchURL) searchResSoup=BeautifulSoup(searchResPage) bookLinkList=searchResSoup.find('h2','resbdy'); if not bookLinkList: searchURL=titleSearchURL; searchResPage=urlgrabber.urlopen(searchURL) searchResSoup=BeautifulSoup(searchResPage)
import urllib2 import string import commands from urlgrabber import urlopen from bs4 import BeautifulSoup import subprocess mac_id = commands.getstatusoutput( "ethtool -P eth0 | awk -F \' \' \'{print $3}\'") url = str('http://vps.sensorfaucets.com/stock_db/regester.php?mac=' ) + mac_id[1] + str('&type=HUB') html = urlopen(url).read() parsed_html = BeautifulSoup(html, "html.parser") print parsed_html.get_text() #html.close() url = str( 'http://vps.sensorfaucets.com/stock_db/checking.php?mac=') + mac_id[1] while 1: htmlc = urlopen(url).read() parsed_htmlc = BeautifulSoup(htmlc, "html.parser") s = parsed_htmlc.get_text() p = s[0:5] + s[26:50] print p
def get(url): return urlgrabber.urlopen(get_filename(url))
def test_urlopen(self): "module-level urlopen() function" fo = urlgrabber.urlopen('http://abat.au.example.com') fo.close()