def chunk_get(process_no, dest_dir, file_url, file_size): file_name = file_url.split('/')[-1] url = "ftp://localhost:2121/" + file_url file_path = dest_dir + file_name + ".part" + str(process_no) file_dir = file_url.rsplit('/', 1)[0] try: if (os.path.isfile(file_path) == False): raise Exception('') else: g = URLGrabber(reget="simple") start_byte = os.stat(file_path).st_size if start_byte < process_no * file_size / 5: if process_no == 4: end_byte = file_size else: end_byte = process_no * file_size / 5 file_temp_path = file_path + ".tmp" local_file = g.urlgrab(url, filename=file_temp_path, range=(start_byte, end_byte), retry=0) file(file_path, 'ab').write(file(file_temp_path, 'rb').read()) os.remove(file_temp_path) except: g = URLGrabber(reget="simple") start_byte = (process_no) * file_size / 5 if process_no == 4: end_byte = file_size else: end_byte = start_byte + file_size / 5 local_file = g.urlgrab(url, filename=file_path, range=(start_byte, end_byte), retry=0)
def threaded_download(single_download, logfile=None): """ This method initiate with an URL as a thread from a threadPool. But on its own, it is not thread-safe. It has to be managed to the caller Download location: <Current Directory> single_download --> complete download link logfile --> use default logfile if not supplied with. """ # registering CTRL+C as UserInterrupt # signal.signal(signal.SIGINT, signal.SIG_IGN) response = "Not Downloaded" try: download_size = int((u2.urlopen(single_download) ).info().getheaders("Content-Length")[0]) print "Starting: " + str( single_download) + " :: Download target's size: %s KB" % ( download_size / 1024) g = URLGrabber(reget='simple', retry=default_retry, timeout=default_timeout, proxies=default_proxy) response = g.urlgrab(single_download) print "Completed: " + response except URLGrabError as ue: print str(ue) + "\nskipping: " + single_download else: return response # response --> downloaded file's name, if download is successful
def test_make_callback(self): """grabber.URLGrabber._make_callback() tests""" def cb(e): pass tup_cb = (cb, ('stuff'), {'some': 'dict'}) g = URLGrabber() self.assertEquals(g._make_callback(cb), (cb, (), {})) self.assertEquals(g._make_callback(tup_cb), tup_cb)
def _retrievePublicKey(self, keyurl, repo=None): """ Retrieve a key file @param keyurl: url to the key to retrieve Returns a list of dicts with all the keyinfo """ key_installed = False # Go get the GPG key from the given URL try: url = yum.misc.to_utf8(keyurl) if repo is None: rawkey = urlgrabber.urlread(url, limit=9999) else: # If we have a repo. use the proxy etc. configuration for it. # In theory we have a global proxy config. too, but meh... # external callers should just update. ug = URLGrabber(bandwidth=repo.bandwidth, retry=repo.retries, throttle=repo.throttle, progress_obj=repo.callback, proxies=repo.proxy_dict) ug.opts.user_agent = default_grabber.opts.user_agent rawkey = ug.urlread(url, text=repo.id + "/gpgkey") except urlgrabber.grabber.URLGrabError, e: raise ChannelException('GPG key retrieval failed: ' + yum.i18n.to_unicode(str(e)))
def setUp(self): def server(): import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(('localhost', 2000)) s.listen(1) while 1: c, a = s.accept() while not c.recv(4096).endswith('\r\n\r\n'): pass c.sendall('HTTP/1.1 %d %s\r\n' % self.reply) c.close() import thread self.reply = 503, "Busy" thread.start_new_thread(server, ()) def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback=failure)
def update_categories(username, subscriptions): g = URLGrabber() folder = BASE + '/' + username if not os.path.exists(folder): os.mkdir(folder) cats = get_categories(username) visited = set() for sub in subscriptions: if sub.name in visited: continue elif sub.name in cats: del cats[sub.name] visited.add(sub.name) continue else: print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname) ft = sub.thumbnail[-3:] nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft) g.urlgrab(sub.thumbnail, filename=nf) for sub in cats: print 'Removing thumbnail for %s'%sub if cats[sub] is None: old_fn = '%s/%s*'%(folder, sub) else: old_fn = '%s/%s/%s*'%(folder, cats[sub], sub) for fl in glob.glob(old_fn): print '\t', fl os.remove(fl)
def __init__(self, config={}): self.gotLibUrlGrabber = False try: from urlgrabber.grabber import URLGrabber except: writeError('This script is better with URLBrabber.') writeError('See http://linux.duke.edu/projects/urlgrabber/') self.gotLibUrlGrabber = False if not self.gotLibUrlGrabber: return if config.has_key('proxy'): writeInfo("URLGrabberWithProxy : %s" % config['proxy']) self.g = URLGrabber(proxies={'http': config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber()
def setUp(self): self.g = URLGrabber() fullmirrors = [base_mirror_url + m + '/' for m in \ (bad_mirrors + good_mirrors)] if hasattr(urlgrabber.grabber, '_TH'): # test assumes mirrors are not re-ordered urlgrabber.grabber._TH.hosts.clear() self.mg = MirrorGroup(self.g, fullmirrors)
def setUp(self): self.url = ref_ftp if not self.have_proxy(): self.skip() try: fo = urllib.request.urlopen(self.url).close() except IOError: self.skip() self.g = URLGrabber()
def mediaHandler(self, *args, **kwargs): relative = kwargs["relative"] ug = URLGrabber(checkfunc=kwargs["checkfunc"]) ug.urlgrab("%s/%s" % (self.tree, kwargs["relative"]), kwargs["local"], text=kwargs["text"], range=kwargs["range"], copy_local=1) return kwargs["local"]
def _retrievePublicKey(self, keyurl, repo=None): """ Retrieve a key file @param keyurl: url to the key to retrieve Returns a list of dicts with all the keyinfo """ key_installed = False # Go get the GPG key from the given URL try: url = yum.misc.to_utf8(keyurl) if repo is None: rawkey = urlgrabber.urlread(url, limit=9999) else: # If we have a repo. use the proxy etc. configuration for it. # In theory we have a global proxy config. too, but meh... # external callers should just update. ug = URLGrabber(bandwidth=repo.bandwidth, retry=repo.retries, throttle=repo.throttle, progress_obj=repo.callback, proxies=repo.proxy_dict) ug.opts.user_agent = default_grabber.opts.user_agent rawkey = ug.urlread(url, text=repo.id + "/gpgkey") except urlgrabber.grabber.URLGrabError as e: raise ChannelException('GPG key retrieval failed: ' + yum.i18n.to_unicode(str(e))) # Parse the key try: keys_info = yum.misc.getgpgkeyinfo(rawkey, multiple=True) except ValueError as err: raise ChannelException( 'GPG key information retrieval failed: {}'.format(err)) except Exception as err: raise ChannelException( 'Unhandled GPG key failure occurred: {}'.format(err)) keys = [] for keyinfo in keys_info: thiskey = {} for info in ('keyid', 'timestamp', 'userid', 'fingerprint', 'raw_key'): if not keyinfo.has_key(info): raise ChannelException( 'GPG key parsing failed: key does not have value %s' % info) thiskey[info] = keyinfo[info] thiskey['keyid'] = str( "%016x" % (thiskey['keyid'] & 0xffffffffffffffff)).upper() thiskey['hexkeyid'] = yum.misc.keyIdToRPMVer( keyinfo['keyid']).upper() keys.append(thiskey) return keys
def test_parse_url_with_prefix(self): """grabber.URLParser.parse() with opts.prefix""" base = 'http://foo.com/dir' bases = [base, base+'/'] filename = 'bar/baz' target = base + '/' + filename for b in bases: g = URLGrabber(prefix=b) (url, parts) = g.opts.urlparser.parse(filename, g.opts) self.assertEquals(url, target)
def _getTreeInfo(self, url, proxy_url, sslverify): """ Retrieve treeinfo and return the path to the local file. :param baseurl: url of the repo :type baseurl: string :param proxy_url: Optional full proxy URL of or "" :type proxy_url: string :param sslverify: True if SSL certificate should be varified :type sslverify: bool :returns: Path to retrieved .treeinfo file or None :rtype: string or None """ if not url: return None log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)", url, proxy_url, sslverify) ugopts = {"ssl_verify_peer": sslverify, "ssl_verify_host": sslverify} proxies = {} if proxy_url: try: proxy = ProxyString(proxy_url) proxies = {"http": proxy.url, "https": proxy.url} except ProxyStringError as e: log.info("Failed to parse proxy for _getTreeInfo %s: %s", proxy_url, e) ug = URLGrabber() try: treeinfo = ug.urlgrab("%s/.treeinfo" % url, "/tmp/.treeinfo", copy_local=True, proxies=proxies, **ugopts) except URLGrabError as e: try: treeinfo = ug.urlgrab("%s/treeinfo" % url, "/tmp/.treeinfo", copy_local=True, proxies=proxies, **ugopts) except URLGrabError as e: log.info("Error downloading treeinfo: %s", e) treeinfo = None return treeinfo
def download(url, filename=None, associated_task=None, web_proxy=None): if associated_task: associated_task.description = _("Downloading %s") % os.path.basename( url) associated_task.unit = "KB" log.debug("downloading %s > %s" % (url, filename)) progress_obj = DownloadProgress(associated_task) if web_proxy: web_proxy = {'http': web_proxy} urlgrabber = URLGrabber(reget='simple', proxies=web_proxy, progress_obj=progress_obj) if os.path.isdir(filename): basename = os.path.basename(url) filename = os.path.join(filename, basename) filename = urlgrabber.urlgrab(url, filename=filename) return filename
def testKeywordArgs(self): """grabber.URLGrabber.__init__() **kwargs handling. This is a simple test that just passes some arbitrary values into the URLGrabber constructor and checks that they've been set properly. """ opener = urllib2.OpenerDirector() g = URLGrabber(progress_obj=self.meter, throttle=0.9, bandwidth=20, retry=20, retrycodes=[5, 6, 7], copy_local=1, close_connection=1, user_agent='test ua/1.0', proxies={'http': 'http://www.proxy.com:9090'}, opener=opener) opts = g.opts self.assertEquals(opts.progress_obj, self.meter) self.assertEquals(opts.throttle, 0.9) self.assertEquals(opts.bandwidth, 20) self.assertEquals(opts.retry, 20) self.assertEquals(opts.retrycodes, [5, 6, 7]) self.assertEquals(opts.copy_local, 1) self.assertEquals(opts.close_connection, 1) self.assertEquals(opts.user_agent, 'test ua/1.0') self.assertEquals(opts.proxies, {'http': 'http://www.proxy.com:9090'}) self.assertEquals(opts.opener, opener) nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5, copy_local=0) self.assertEquals(nopts.progress_obj, self.meter) self.assertEquals(nopts.throttle, 0.5) self.assertEquals(nopts.bandwidth, 20) self.assertEquals(nopts.retry, 20) self.assertEquals(nopts.retrycodes, [5, 6, 7]) self.assertEquals(nopts.copy_local, 0) self.assertEquals(nopts.close_connection, 1) self.assertEquals(nopts.user_agent, 'test ua/1.0') self.assertEquals(nopts.proxies, {'http': 'http://www.proxy.com:9090'}) nopts.opener = None self.assertEquals(nopts.opener, None)
def validConnection (szURL, szVersion, bsupgrade): try: upgrade_tarball = "nsg-upgrade.tar.gz" baseURL = re.sub(r'/[^/]+$', '', szURL) bootstrap_url = baseURL + "/nsg-upgrade/" + upgrade_tarball grabber = URLGrabber(timeout=30.0) bsupgrade = grabber.urlgrab( bootstrap_url, "/tmp/" + upgrade_tarball ) except URLGrabError, e: if e[0] == 4: aszHost = szURL.split("/") return "ERROR Connection check failed: Host %s is not responding" % (aszHost[2]) elif e[0] == 14: return "ERROR Connection check failed: nsg-upgrade directory was not found in url %s" % szURL else: return "ERROR Checking Connection: %d %s" % (e[0] , e[1]) return "ERROR " + e.strerror
def _test_url(self, urllist): g = URLGrabber() try: quote = urllist[3] except IndexError: quote = None g.opts.quote = quote (url, parts) = g.opts.urlparser.parse(urllist[0], g.opts) if 1: self.assertEquals(url, urllist[1]) self.assertEquals(parts, urllist[2]) else: if url == urllist[1] and parts == urllist[2]: print('OK: %s' % urllist[0]) else: print('ERROR: %s' % urllist[0]) print(' ' + urllist[1]) print(' ' + url) print(' ' + urllist[2]) print(' ' + parts)
def download_file(url, dirname): """ Download @url and save to @dirname. @return - filename of saved file """ # pycurl is picky about Unicode URLs, see rhbz #515797 url = url.encode('ascii', 'ignore') if not os.path.exists(dirname): os.makedirs(dirname) basename = os.path.basename(url) filename = "%s/%s" % (dirname, basename) if os.path.exists(filename): raise Exception("File %s already exists! Not downloading!" % filename) g = URLGrabber(reget=None) local_filename = g.urlgrab(url, filename) return local_filename
def run(self): #Check if file exists if os.path.isfile(self.file): os.chmod(self.file, stat.S_IWUSR) os.remove(self.file) ##Init url/path pointers #response = urllib2.urlopen(self.url) #total_size = response.info().getheader('Content-Length').strip() #self.total_size = int(total_size) #freespace #freespace = get_free_space(self.app, path) #check if enough freespace #if self.freespace < total_size and self.freespace != 0: # self.app.gui.ShowDialogNotification('Not enough freespace to download the item') # self.active = False # return self.app.gui.SetVisible(4000, True) progress = TextMeter(self.app) try: Log(self.app, 'Download started') g = URLGrabber(reget='simple') g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename) #Create info file as json json_dumps(self.infodata, self.infopath) self.app.gui.ShowDialogNotification('Download Complete') except: Log(self.app, traceback.format_exc()) self.app.gui.ShowDialogNotification('Error during download') self.app.gui.SetVisible(4000, False) self.active = False Log(self.app, 'Download finished')
def setUp(self): # start the server self.exit = False def server(): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(LOCALPORT) s.listen(1) while 1: c, a = s.accept() if self.exit: c.close() break ending_compat = '\r\n\r\n' if not six.PY3 else b'\r\n\r\n' while not c.recv(4096).endswith(ending_compat): pass http_compat = 'HTTP/1.1 %d %s\r\n' % self.reply c.sendall(http_compat if not six.PY3 else http_compat. encode('utf-8')) if self.content is not None: cont_length_compat = 'Content-Length: %d\r\n\r\n' % len( self.content) c.sendall(cont_length_compat if not six.PY3 else cont_length_compat.encode('utf-8')) c.sendall(self.content if not six.PY3 else self.content. encode('utf-8')) c.close() s.close() self.exit = False thread.start_new_thread(server, ()) # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT], failure_callback=failure)
def setUp(self): # start the server self.exit = False self.process = lambda data: None s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind(('localhost', 0)) s.listen(1) self.port = s.getsockname()[1] def server(): while True: c, a = s.accept() if self.exit: c.close() break data = b'' while not data.endswith(b'\r\n\r\n'): data = c.recv(4096) self.process(data) c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply) if self.content is not None: c.sendall(b'Content-Length: %d\r\n\r\n' % len(self.content)) c.sendall(self.content) c.close() s.close() self.exit = False self.thread = threading.Thread(target=server) self.thread.start() # create grabber and mirror group objects def failure(obj): self.code = getattr(obj.exception, 'code', None) return {} self.g = URLGrabber() self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port], failure_callback=failure)
def _preInstall_url_image(self): """ Download the image using urlgrabber """ # Setup urlgrabber and call back to download image to sysroot progress = URLGrabberProgress() ugopts = { "ssl_verify_peer": not self.data.method.noverifyssl, "ssl_verify_host": not self.data.method.noverifyssl, "proxies": self._proxies, "progress_obj": progress, "copy_local": True } error = None try: ug = URLGrabber() ug.urlgrab(self.data.method.url, self.image_path, **ugopts) except URLGrabError as e: log.error("Error downloading liveimg: %s", e) error = e else: if not os.path.exists(self.image_path): error = "Failed to download %s, file doesn't exist" % self.data.method.url log.error(error)
def _test_url(self, urllist): g = URLGrabber() try: quote = urllist[3] except IndexError: quote = None g.opts.quote = quote url = urllist[0].encode('utf8') expected_url = urllist[1].encode('utf8') expected_parts = tuple(part.encode('utf8') for part in urllist[2]) (url, parts) = g.opts.urlparser.parse(url, g.opts) if 1: self.assertEqual(url, expected_url) self.assertEqual(parts, expected_parts) else: if url == urllist[1] and parts == urllist[2]: print('OK: %s' % urllist[0]) else: print('ERROR: %s' % urllist[0]) print(' ' + urllist[1]) print(' ' + url) print(' ' + urllist[2]) print(' ' + parts)
def setUp(self): self.url = ref_http if not self.have_proxy(): self.skip() self.g = URLGrabber()
#!/usr/bin/python3 from urlgrabber import urlopen from urlgrabber.grabber import URLGrabber from urlgrabber.mirror import MirrorGroup fo = urlopen('http://localhost') data = fo.read() print(data) gr = URLGrabber() mg = MirrorGroup(gr, ['http://localhost2/', 'http://me.myself/']) mg.urlgrab('test.txt')
def __init__(self, maxthreads=5, **kwargs): self.maxthreads = 5 self.grabber = URLGrabber(**kwargs) self.queue = [] self.threads = [] self.sem = Semaphore()
def setUp(self): self.g = URLGrabber() self.fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors]
def setUp(self): self.g = URLGrabber() fullmirrors = [base_mirror_url + m + '/' for m in good_mirrors] self.mg = MirrorGroup(self.g, fullmirrors)
def preInstall(self, *args, **kwargs): """ Download image and loopback mount it. This is called after partitioning is setup, we now have space to grab the image. Download it to ROOT_PATH and provide feedback during the download (using urlgrabber callback). """ # Setup urlgrabber and call back to download image to ROOT_PATH progress = URLGrabberProgress() ugopts = {"ssl_verify_peer": not self.data.method.noverifyssl, "ssl_verify_host": not self.data.method.noverifyssl, "proxies" : self._proxies, "progress_obj" : progress, "copy_local" : True} error = None try: ug = URLGrabber() ug.urlgrab(self.data.method.url, self.image_path, **ugopts) except URLGrabError as e: log.error("Error downloading liveimg: %s", e) error = e else: if not os.path.exists(self.image_path): error = "Failed to download %s, file doesn't exist" % self.data.method.url log.error(error) if error: exn = PayloadInstallError(str(error)) if errorHandler.cb(exn) == ERROR_RAISE: raise exn # Used to make install progress % look correct self._adj_size = os.stat(self.image_path)[stat.ST_SIZE] if self.data.method.checksum: progressQ.send_message(_("Checking image checksum")) sha256 = hashlib.sha256() with open(self.image_path, "rb") as f: while True: data = f.read(1024*1024) if not data: break sha256.update(data) filesum = sha256.hexdigest() log.debug("sha256 of %s is %s", self.data.method.url, filesum) if lowerASCII(self.data.method.checksum) != filesum: log.error("%s does not match checksum.", self.data.method.checksum) exn = PayloadInstallError("Checksum of image does not match") if errorHandler.cb(exn) == ERROR_RAISE: raise exn # Mount the image and check to see if it is a LiveOS/*.img # style squashfs image. If so, move it to IMAGE_DIR and mount the real # root image on INSTALL_TREE blivet.util.mount(self.image_path, INSTALL_TREE, fstype="auto", options="ro") if os.path.exists(INSTALL_TREE+"/LiveOS"): # Find the first .img in the directory and mount that on INSTALL_TREE img_files = glob.glob(INSTALL_TREE+"/LiveOS/*.img") if img_files: img_file = os.path.basename(sorted(img_files)[0]) # move the mount to IMAGE_DIR os.makedirs(IMAGE_DIR, 0755) # work around inability to move shared filesystems iutil.execWithRedirect("mount", ["--make-rprivate", "/"]) iutil.execWithRedirect("mount", ["--move", INSTALL_TREE, IMAGE_DIR]) blivet.util.mount(IMAGE_DIR+"/LiveOS/"+img_file, INSTALL_TREE, fstype="auto", options="ro")
self.g = URLGrabber(proxies={'http': config['proxy']}) else: writeInfo("URLGrabbersansProxy") self.g = URLGrabber() def getWebFile(self, url, dest): if not self.gotLibUrlGrabber: import urllib fd = open(dest, "wb") fd.write(urllib.urlopen(url).read()) fd.close() else: urllib.urlretrieve("http://www.example.com/songs/mp3.mp3", "mp3.mp3") self.g.urlgrab(url, filename=dest) if __name__ == '__main__': g = URLGrabber(proxies={'http': 'http://proxy.free.fr:3128'}) url = 'http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip' g.urlgrab(url, filename='moncul.zip') g1 = WebGrabber(config={'proxy': 'http://proxy.free.fr:3128'}) g2 = WebGrabber() print "g1 is g2 %s" % (g1 is g2) g1.getWebFile('http://www.advanscene.com/offline/datas/ADVANsCEne_NDS.zip', 'moncul.zip') print "Done."