req = urllib.request.Request(url + "/" + href, credentials, {"Accept-Encoding": "gzip"}) try: f = common.retrieve_tmpfile(req) except urllib.error.HTTPError as e: if e.code == 403: print("WARNING: %s, continuing..." % e, file=sys.stderr) warnings += 1 continue raise if isgzip(f): try: g = gzip.GzipFile(fileobj=f, mode="r") common.sendfile_disk(g, path) g.close() except Exception as e: print("WARNING: %s, continuing..." % e, file=sys.stderr) warnings += 1 continue else: common.sendfile_disk(f, path) f.close() common.mkro(path) mailindex.index(".", _list, path) attachments.extract(path) thunderbird.link(path)
break if not path in db or not os.path.isfile(path): common.mkdirs(os.path.split(path)[0]) try: f = common.retrieve_tmpfile(url + "/" + href, credentials) except urllib2.HTTPError, e: if e.code == 403: print >>sys.stderr, "WARNING: %s, continuing..." % e warnings += 1 continue raise if isgzip(f): g = gzip.GzipFile(fileobj = f, mode = "r") common.sendfile_disk(g, path) g.close() else: common.sendfile_disk(f, path) f.close() common.mkro(path) mailindex.index(".", _list, path) attachments.extract(path) thunderbird.link(path) if not (tm.tm_year == now.tm_year and tm.tm_mon == now.tm_mon): db.add(path) with open(".sync-done", "w") as f:
return bytes == "%PDF" if __name__ == "__main__": global config config = common.load_config() print >>sys.stderr, "Utility needs update since relaunch of www.redhat.com, feel free to submit patches..." sys.exit(1) common.mkdirs(config["references-base"]) os.chdir(config["references-base"]) lock = common.Lock(".lock") common.retrieve("http://www.redhat.com/customersuccess/", "index.html") common.mkro("index.html") toc = lxml.html.soupparser.parse("index.html").getroot() for url in toc.xpath("//a[substring-after(@href, '.') = 'pdf']/../../.."): url = copy.deepcopy(url) title = url.xpath("//h4//a/text()")[0].replace("/", "_") href = url.xpath("//a[substring-after(@href, '.') = 'pdf']/@href")[0] print >>sys.stderr, title f = common.retrieve_tmpfile("http://www.redhat.com" + href) if ispdf(f): # a few links on /customersuccess are currently broken HTML files common.sendfile_disk(f, title + ".pdf") common.mkro(title + ".pdf") f.close()