Python FancyURLopener.FancyURLopener Examples, urllib.FancyURLopener.FancyURLopener Python Examples

Example #1

0

Show file

File: main.py Project: buzzlightila/Site-Collect

def site_a(site):
    if site[0:7] != 'http://':
        site = 'http://' + site
    opener = FancyURLopener()  # criando o 'capturador' de paginas
    page = opener.open(site)  # uma URL de teste
    html = page.read(
    )  # vai se conectar o servidor e capturar o html retornado
    # print html # se quiser ver o html bruto
    soup = BeautifulSoup(
        html, "lxml")  # limpa as tags de html para deixar apenas o conteudo
    for script in soup(["script", "style"]):
        script.extract()  # retirando os codigos em Javascript e CSS
    conteudo = soup.get_text()

    limpa = ['com', 'br', 'www', 'http']
    site = re.sub(r'[^\w]', " ", site).split()
    novo_site = ''
    for a in site:
        if a not in limpa:
            novo_site += a
    site = novo_site
    file = open('site_w/' + site + '.txt', 'w')
    file.write(
        (conteudo.encode('utf-8')
         ).lower())  # imprime o texto limpo (sem tags html, Javascript ou CSS)
    lista_temas = {
        'esporte': ('futebol', 'bola', 'jogador', 'esporte', 'flamengo',
                    'vasco', 'botafogo', 'fluminense', 'sport'),
        'engenharia': ('engenharia', 'engenharias', 'engineer'),
        'jogos': ('jogo', 'jogos', 'game', 'games')
    }
    tema(lista_temas, site)

Example #2

0

Show file

def unicode_urlopen(url, accept_lang=None):
    """Returns a *Unicode* file-like object for non-local documents.
    Client must ensure that the URL points to non-binary data. Pass in
    an Accept-Language value to configure the FancyURLopener we
    use."""

    opener = FancyURLopener()

    if accept_lang:
        opener.addheader("Accept-Language", accept_lang)

    # We want to convert the bytes file-like object returned by
    # urllib, which is bytes in both Python 2 and Python 3
    # fortunately, and turn it into a Unicode file-like object
    # with a little help from our StringIO friend.
    page = opener.open(url)
    encoding = page.headers['content-type']
    encoding = encoding.split('charset=')
    if len(encoding) > 1:
        encoding = encoding[-1]
        page = page.read().decode(encoding)
    else:
        page = page.read()
        encoding = meta_encoding(page) or 'utf8'
        page = page.decode(encoding)

    page = StringIO(page)
    return page

Example #3

0

Show file

File: input_source.py Project: EDUlib/eTracesX

    def ensureFileLocal(self, inFilePathOrURL):
        '''
        Takes a file path or URL. Sets self.localFilePath
        to the same path if file is local, or
        if the file is remote but uncompressed. 
        If a file is remote and compressed, retrieves
        the file into a local tmp file and returns that
        file name. In this case the flag self.deleteTempFile
        is set to True. 

        :param inFilePathOrURL: file path or URL to file
        :type inFilePathOrURL: String
        '''
        self.localFilePath = inFilePathOrURL
        self.deleteTempFile = False
        if self.compression == COMPRESSION_TYPE.NO_COMPRESSION:
            return
        # Got compressed file; is it local?
        parseResult = urlparse(inFilePathOrURL)
        if parseResult.scheme == 'file':
            self.localFilePath = parseResult.path
            return
        opener = FancyURLopener()
        # Throws IOError if URL does not exist:
        self.localFilePath = opener.retrieve(inFilePathOrURL)[0]
        self.deleteTempFile = True

Example #4

0

Show file

def utOpen(file):
    # Open file
    if 'http' in file:
        opener = FancyURLopener()
        f = opener.open(file)
    else:
        f = open(file,'rb+')
    return f

Example #5

0

Show file

    def download(self, download_dir):
        dir_util.mkpath(download_dir)
        url = self.installer_url()
        print 'Downloading:', url

        web = FancyURLopener()
        web.retrieve(url, path.join(download_dir, path.basename(url)),
                     display_progress)

Example #6

0

Show file

 def _get_sector_url(self, sector, length):
     start = sector * 2048
     if self._buff:
         self._buff.close()
     opener = FancyURLopener()
     opener.http_error_206 = lambda *a, **k: None
     opener.addheader("Range", "bytes=%d-%d" % (start, start + length - 1))
     self._buff = opener.open(self._url)

Example #7

0

Show file

File: shoutcast.py Project: nickersk/enigma2-plugins

 def fetch_genres(self):
     """
     Grabs genres and returns tuple of genres
     """
     self.genre_url = 'http://www.shoutcast.com/sbin/newxml.phtml'
     self.urlhandler = FancyURLopener()
     self.fd = self.urlhandler.open(self.genre_url)
     self.genre = self.fd.read()
     self.fd.close()
     return self.genre

Example #8

0

Show file

File: shoutcast.py Project: nickersk/enigma2-plugins

 def fetch_stations(self):
     """
     Grabs the xml list of stations from the shoutcast server
     """
     self.shout_url = 'http://www.shoutcast.com/sbin/newxml.phtml?genre=' + self.genre
     self.urlhandler = FancyURLopener()
     self.fd = self.urlhandler.open(self.shout_url)
     self.stations = self.fd.read()
     self.fd.close()
     return self.stations

Example #9

0

Show file

File: package.py Project: finnianr/Eiffel-Loop-safe

    def download(self, download_dir):
        result = path.join(download_dir, self.package_basename)
        if path.exists(result):
            print 'Found install', self.package_basename
        else:
            dir_util.mkpath(download_dir)
            url = "http://www.eiffel-loop.com/download/" + self.package_basename
            print 'Downloading:', url

            web = FancyURLopener()
            web.retrieve(url, result, display_progress)

        return result

Example #10

0

Show file

    def _download_build(self):
        build_url = URL_PREFIX
        if build_number.startswith(('4', '5', '6')):
            build_url += "/ob"
        else:
            build_url += "/sb"
        build_url += "/build/%s" % build_number
        logger.info("Build url is %s" % build_url)

        resource = json.loads(urllib2.urlopen(build_url).read())
        deliverable_url = URL_PREFIX + "/%s" % resource[DELIVERABLE_URL_ATTR]
        infos = json.loads(urllib2.urlopen(deliverable_url).read())
        for info in infos[LIST_ATTR]:
            if info[DOWNLOAD_URL_ATTR].find("VMware-viewagent-x86_64") > 0:
                FancyURLopener(proxies={}).retrieve(info[DOWNLOAD_URL_ATTR],
                                                    INSTALL_FILE)
                logger.info('Download %s to %s SUCCEED' %
                            (info[DOWNLOAD_URL_ATTR], INSTALL_FILE))

Example #11

0

Show file

File: pip_download.py Project: shiveshwar/impala-3.0.1

def download_package(pkg_name, pkg_version):
  file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name,
      pkg_version)
  if not file_name:
    return False
  if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm,
      expected_digest):
    print('File with matching digest already exists, skipping {0}'.format(file_name))
    return True
  downloader = FancyURLopener()
  pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
  print('Downloading {0} from {1}'.format(file_name, pkg_url))
  downloader.retrieve(pkg_url, file_name)
  if check_digest(file_name, hash_algorithm, expected_digest):
    return True
  else:
    print('Hash digest check failed in file {0}.'.format(file_name))
    return False

Example #12

0

Show file

File: addon.py Project: mrtnmtth/kodi.plugin.blpl

def retrieveCatalog():
    try:
        cache = SimpleCache()
        catalog = cache.get(ADDON_NAME + '.catalog')
        if catalog:
            log("using cached catalog")
        if not catalog:
            log("downloading catalog")
            opener = FancyURLopener()
            f = opener.open(url)
            catalog = json.load(f)
            cache.set(ADDON_NAME + '.catalog',
                      catalog,
                      expiration=datetime.timedelta(hours=12))
        return catalog
    except Exception as e:
        log("error retrieving catalog - " + str(e), xbmc.LOGERROR)
        xbmcgui.Dialog().notification(ADDON_NAME, LANGUAGE(30003), ICON, 4000)
        xbmc.executebuiltin('Action(PreviousMenu)')
        sys.exit(0)

Example #13

0

Show file

    def __install_grinder(self, grinder_path):
        """
        Installs Grinder.
        Grinder version and download link may be set in config:
        "download-link":"http://domain/resource-{version}.zip"
        "version":"1.2.3"
        """

        dest = os.path.dirname(
            os.path.dirname(os.path.expanduser(grinder_path)))
        if not dest:
            dest = os.path.expanduser("~/grinder-taurus")
        dest = os.path.abspath(dest)
        grinder_full_path = os.path.join(dest, "lib", "grinder.jar")
        try:
            self.__grinder(grinder_full_path)
            return grinder_full_path
        except CalledProcessError:
            self.log.info("Will try to install grinder into %s", dest)

        downloader = FancyURLopener()
        grinder_zip_path = self.engine.create_artifact("grinder-dist", ".zip")
        version = self.settings.get("version", GrinderExecutor.VERSION)
        download_link = self.settings.get("download-link",
                                          GrinderExecutor.DOWNLOAD_LINK)
        download_link = download_link.format(version=version)
        self.log.info("Downloading %s", download_link)

        try:
            downloader.retrieve(download_link, grinder_zip_path,
                                download_progress_hook)
        except BaseException as e:
            self.log.error("Error while downloading %s", download_link)
            raise e

        self.log.info("Unzipping %s", grinder_zip_path)
        unzip(grinder_zip_path, dest, 'grinder-' + version)
        os.remove(grinder_zip_path)
        self.log.info("Installed grinder successfully")
        return grinder_full_path

Example #14

0

Show file

    def __install_gatling(self, gatling_path):
        """
        Installs Gatling.
        Gatling version and download link may be set in config:
        "download-link":"http://domain/resource-{version}.zip"
        "version":"1.2.3"
        """
        dest = os.path.dirname(
            os.path.dirname(os.path.expanduser(gatling_path)))  # ../..
        dest = os.path.abspath(dest)

        try:
            self.__gatling(gatling_path)
            return gatling_path
        except OSError:
            self.log.info("Will try to install Gatling into %s", dest)

        # download gatling
        downloader = FancyURLopener()
        gatling_zip_path = self.engine.create_artifact("gatling-dist", ".zip")
        version = self.settings.get("version", GatlingExecutor.VERSION)
        download_link = self.settings.get("download-link",
                                          GatlingExecutor.DOWNLOAD_LINK)
        download_link = download_link.format(version=version)
        self.log.info("Downloading %s", download_link)
        # TODO: check archive checksum/hash before unzip and run

        try:
            downloader.retrieve(download_link, gatling_zip_path,
                                download_progress_hook)
        except BaseException as e:
            self.log.error("Error while downloading %s", download_link)
            raise e

        self.log.info("Unzipping %s", gatling_zip_path)
        unzip(gatling_zip_path, dest,
              'gatling-charts-highcharts-bundle-' + version)
        os.remove(gatling_zip_path)
        os.chmod(os.path.expanduser(gatling_path), 0o755)
        self.log.info("Installed Gatling successfully")

Example #15

0

Show file

def fetchURL(url, file='', params=None, headers={}, isBinary=False, encodeURL=True):
	log("> bbbLib.fetchURL() %s isBinary=%s encodeURL=%s" % (url, isBinary, encodeURL))
	if encodeURL:
		safe_url = quote_plus(url,'/:&?=+#@')
	else:
		safe_url = url

	success = False
	data = None
	if not file:
		# create temp file
		file = xbmc.translatePath( "special://temp/temp.html" )

	# remove destination file if exists already
	deleteFile(file)

	# fetch from url
	try:
		opener = FancyURLopener()

		# add headers if supplied
		if not headers.has_key('User-Agent') and not headers.has_key('User-agent'):
			headers['User-Agent'] = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
		for name, value  in headers.items():
			opener.addheader(name, value)

		fn, resp = opener.retrieve(safe_url, file, data=params)
#		print fn, resp

		content_type = resp.get("Content-Type",'').lower()
		# fail if expecting an image but not correct type returned
		if isBinary and (find(content_type,"text") != -1):
			raise "Not Binary"

		opener.close()
		del opener
		urlcleanup()
	except IOError, errobj:
		ErrorCode(errobj)

Example #16

0

Show file

 def _getlinesfromurl(self, url):
     err = 0
     strerr = ''
     # Retry URL download a few times.
     for count in range(self.retries):
         if count != 0:
             time.sleep(self.retrysecs)
         try:
             opener = FancyURLopener()
             f = opener.open(url,
                             data='user_name=%s&password=%s&login=Login' %
                             (self.username, self.password))
             rc = 0
             if 'www-authenticate' in f.headers:
                 rc = 1
                 strerr = 'Authentication is required to access %s' % url
             break
         except IOError, (_err, _strerr):
             rc = 1
             print url
             print _strerr
             (err, strerr) = (_err, _strerr)

Example #17

0

Show file

    def download(self):
        bean = self.bean
        update = self.update
        if not bean or not bean.path:
            return None

        opener = FancyURLopener()
        remote = opener.open(bean.path)
        remote_size = 0

        if "Content-Length" in remote.headers:
            remote_size = int(remote.headers["Content-Length"])
            bean.size = size2text(remote_size)

        block_size = 4096
        block_count = 0

        ext = get_file_extension(bean.path)

        path = FC().online_save_to_folder
        if not os.path.isdir(path):
            os.makedirs(path)

        if bean.save_to:
            to_file = os.path.join(bean.save_to, bean.text + ".mp3")
        else:
            to_file = get_bean_download_path(bean, FC().online_save_to_folder)

        if not os.path.exists(os.path.dirname(to_file)):
            os.makedirs(os.path.dirname(to_file))

        to_file_tmp = to_file + ".tmp"

        if os.path.exists(to_file_tmp):
            bean.status = DOWNLOAD_STATUS_INACTIVE
            bean.to_file = to_file
            update(bean)
            return None

        if os.path.exists(to_file):
            bean.status = DOWNLOAD_STATUS_COMPLETED
            bean.to_file = to_file
            update(bean)
            return None

        bean.save_to = to_file
        with file(to_file_tmp, "wb") as tmp_file:
            data = True
            """begin download"""
            self.bean.status = DOWNLOAD_STATUS_DOWNLOADING
            self.bean.path = to_file
            self.update(self.bean)

            while data:
                data = remote.read(block_size)
                if data:
                    block_count += 1
                    tmp_file.write(data)
                    #time.sleep(0.1)
                    persent = block_count * block_size * 100.0 / remote_size
                    if block_count % 50 == 0:
                        bean.persent = persent
                        update(bean)
        time.sleep(0.5)
        """update file info on finish"""
        logging.debug("rename %s - %s" % (to_file_tmp, to_file))
        os.rename(to_file_tmp, to_file)
        bean.status = DOWNLOAD_STATUS_COMPLETED
        bean.to_file = to_file
        bean.persent = 100
        update(bean)

Example #18

0

Show file

File: NatTraversal.py Project: safvan010/123

def urlopen_custom(req, rawserver):
    global _urlopener

    if not _urlopener:
        opener = FancyURLopener()
        _urlopener = opener
        #remove User-Agent
        del _urlopener.addheaders[:]

    if not isinstance(req, str):
        #for header in r.headers:
        #    _urlopener.addheaders.append((header, r.headers[header]))
        #return _urlopener.open(r.get_full_url(), r.data)

        # All this has to be done manually, since httplib and urllib 1 and 2
        # add headers to the request that some routers do not accept.
        # A minimal, functional request includes the headers:
        # Content-Length
        # Soapaction
        # I have found the following to be specifically disallowed:
        # User-agent
        # Connection
        # Accept-encoding

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        (scheme, netloc, path, params, query,
         fragment) = urlparse.urlparse(req.get_full_url())

        if not scheme.startswith("http"):
            raise ValueError("UPnP URL scheme is not http: " +
                             req.get_full_url())

        if len(path) == 0:
            path = '/'

        if netloc.count(":") > 0:
            host, port = netloc.split(':', 1)
            try:
                port = int(port)
            except:
                raise ValueError("UPnP URL port is not int: " +
                                 req.get_full_url())
        else:
            host = netloc
            port = 80

        header_str = ''
        data = ''
        method = ''
        header_str = " " + path + " HTTP/1.0\r\n"
        if req.has_data():
            method = 'POST'
            header_str = method + header_str
            header_str += "Content-Length: " + str(len(req.data)) + "\r\n"
            data = req.data + "\r\n"
        else:
            method = 'GET'
            header_str = method + header_str

        header_str += "Host: " + host + ":" + str(port) + "\r\n"

        for header in req.headers:
            header_str += header + ": " + str(req.headers[header]) + "\r\n"

        header_str += "\r\n"
        data = header_str + data

        try:
            rawserver.add_pending_connection(host)
            s.connect((host, port))
        finally:
            rawserver.remove_pending_connection(host)

        s.send(data)
        r = HTTPResponse(s, method=method)
        r.begin()

        r.recv = r.read
        fp = socket._fileobject(r)

        resp = addinfourl(fp, r.msg, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason

        return resp

    return _urlopener.open(req)

Example #19

0

Show file

File: demo.py Project: barthanssens/belex-tools

from rovin.belex.BelexParser import BelexParser
from urllib import FancyURLopener
from os import path

url = "http://www.ejustice.just.fgov.be/cgi_loi/loi_a1.pl?language=nl&table_name=wet&la=N&cn=1994021730&&caller=list&N&fromtab=wet"
filename = "constitution-nl.html"

if not path.isfile(filename):
    print "Downloading", url
    downloader = FancyURLopener()
    downloader.retrieve(url, filename)

f = open(filename, 'r')
html = f.read()

parser = BelexParser()
parser.feed(html)
for article in parser.all_articles():
    print " <<<< "
    print article.number
    print "BODY:" + article.body
    print " >>>> "

Example #20

0

Show file

    def get_poster(self, item):
        """Returns file path to the new poster"""

        from movie import Progress, Retriever

        file_to_copy = tempfile.mktemp(suffix=self.widgets['movie']['number'].get_text(), \
            dir=self.locations['temp'])
        file_to_copy += ".jpg"
        canceled = False
        try:
            progress = Progress(self.widgets['window'], _("Fetching poster"), _("Wait a moment"))
            retriever = Retriever(item.LargeImage.URL, self.widgets['window'], progress, file_to_copy)
            retriever.start()
            while retriever.isAlive():
                progress.pulse()
                if progress.status:
                    canceled = True
                while gtk.events_pending():
                    gtk.main_iteration()
            progress.close()
            urlcleanup()
        except:
            canceled = True
            gutils.warning(_("Sorry. A connection error has occurred."))
            try:
                os.remove(file_to_copy)
            except:
                log.error("no permission for %s" % file_to_copy)

        if not canceled:
            if os.path.isfile(file_to_copy):
                im = None
                try:
                    im = Image.open(file_to_copy)
                except IOError:
                    log.warn("failed to identify %s" % file_to_copy)

                if im and im.size == (1, 1):
                    url = FancyURLopener().open("http://www.amazon.com/gp/product/images/%s" % item.ASIN).read()
                    if url.find('no-img-sm._V47056216_.gif') > 0:
                        log.warn('No image available')
                        gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com."))
                        return False
                    url = gutils.after(url, 'id="imageViewerDiv"><img src="')
                    url = gutils.before(url, '" id="prodImage"')
                    urlretrieve(url, file_to_copy)
                    try:
                        im = Image.open(file_to_copy)
                    except IOError:
                        log.warn("failed to identify %s", file_to_copy)

                if not im:
                    # something wrong with the image, give some feedback to the user
                    log.warn('No image available')
                    gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com."))
                    return False

                if im.mode != 'RGB': # convert GIFs
                    im = im.convert('RGB')
                    im.save(file_to_copy, 'JPEG')
                # set to None because the file is locked otherwise (os.remove throws an exception)
                im = None

                handler = self.widgets['big_poster'].set_from_file(file_to_copy)

                self.widgets['poster_window'].show()
                self.widgets['poster_window'].move(0, 0)
                if gutils.question(_("Do you want to use this poster instead?"), self.widgets['window']):
                    return file_to_copy
                else:
                    log.info("Reverting to previous poster and deleting new one from disk.")
                    try:
                        os.remove(file_to_copy)
                    except:
                        log.error('cannot remove %s', file_to_copy)

                self.widgets['poster_window'].hide()
            else:
                gutils.warning(_("Sorry. This movie is listed but has no poster available at Amazon.com."))
        else:
            # cleanup temporary files after canceling the download
            if os.path.isfile(file_to_copy):
                try:
                    os.remove(file_to_copy)
                except:
                    log.error('cannot remove %s', file_to_copy)

Example #21

0

Show file

File: package.py Project: sundermann/bockbuild

    def _fetch_sources(self, workspace, package_dir, package_dest_dir):
        def checkout(self, source_url, cache_dir, workspace_dir):
            if not os.path.exists(cache_dir):
                print 'No cache detected. Cloning a fresh cache'
                self.sh('%' + '{git} clone --mirror "%s" "%s"' %
                        (source_url, cache_dir))
            else:
                print 'Updating existing cache'
                self.cd(cache_dir)
                self.sh('%{git} fetch --all --prune')

            if not os.path.exists(workspace_dir):
                print 'No workspace checkout detected. Cloning a fresh workspace checkout from the cache'
                self.sh('%' + '{git} clone --local --shared "%s" "%s"' %
                        (cache_dir, workspace_dir))
                self.cd(workspace_dir)
            else:
                print 'Updating existing workspace checkout'
                self.cd(workspace_dir)
                self.sh('%{git} clean -xffd')
                self.sh('%{git} reset --hard')
                self.sh('%{git} fetch --all --prune')

            if self.revision != None:
                self.sh('%' + '{git} checkout %s' % self.revision)
            elif self.git_branch != None:
                self.sh('%' + '{git} checkout origin/%s' % self.git_branch)
            else:
                self.sh('%{git} checkout origin/master')

        def get_local_filename(source):
            return source if os.path.isfile(source) else os.path.join(
                package_dest_dir, os.path.basename(source))

        def get_cache_name(name):
            if self.organization is None:
                return self.name
            else:
                return self.organization + "+" + name

        if self.sources is None:
            return

        if not os.path.exists(package_dest_dir):
            os.mkdir(package_dest_dir)

        local_sources = []
        for source in self.sources:
            local_source = os.path.join(package_dir, source)
            local_source_file = os.path.basename(local_source)
            local_dest_file = get_local_filename(local_source)
            local_sources.append(local_dest_file)

            if os.path.isfile(local_source):
                if filecmp.cmp(local_source, local_dest_file):
                    log(1, 'using cached source: %s' % local_dest_file)
                else:
                    log(1, 'copying local source: %s' % local_source_file)
                    shutil.copy2(local_source, local_dest_file)
            elif source.startswith(('http://', 'https://', 'ftp://')):
                if os.path.isfile(local_dest_file):
                    try:
                        self.extract_archive(local_dest_file, True)
                        log(1, 'using cached source: %s' % local_dest_file)
                    except:
                        log(1,
                            'local cache is corrupt for: %s' % local_dest_file)
                        os.remove(local_dest_file)

                if not os.path.isfile(local_dest_file):
                    log(1, 'downloading remote source: %s' % source)
                    filename, message = FancyURLopener().retrieve(
                        source, local_dest_file)

            elif source.startswith(
                ('git://', 'file://', 'ssh://')) or source.endswith('.git'):
                log(1, 'cloning or updating git repository: %s' % source)
                local_name = os.path.splitext(os.path.basename(source))[0]
                local_dest_file = os.path.join(
                    package_dest_dir,
                    '%s.gitmirror' % (get_cache_name(local_name)))

                local_sources.pop()
                local_sources.append(local_dest_file)

                working_dir = os.getcwd()
                try:
                    checkout(self, source, local_dest_file, workspace)
                except Exception as e:
                    if os.path.exists(local_dest_file):
                        print 'Deleting ' + local_dest_file + ' cache due to git error'
                        shutil.rmtree(local_dest_file, ignore_errors=True)
                    if os.path.exists(workspace):
                        print 'Deleting ' + workspace + ' cache due to git error'
                        shutil.rmtree(workspace, ignore_errors=True)

                    # Explicitly reset the working dir to a known directory which has not been deleted
                    # 'git clone' does not work if you are in a directory which has been deleted
                    os.chdir(working_dir)
                    checkout(self, source, local_dest_file, workspace)
                finally:
                    os.chdir(workspace)
            else:
                raise Exception('missing source: %s' % source)

        self.sources = local_sources

Example #22

0

Show file

def main(argv=None):  # {{{
    # Separates the URL into a directory and the file or pattern based on the
    # last appearance of '/'.
    if len(sys.argv) > 1:
        pivot = sys.argv[1].rfind("/")
        url = (sys.argv[1])[:pivot]
        pivot += 1
        find = (sys.argv[1])[pivot:]
    else:
        print "******************************************************************************************************************************"
        print "* Invalid input!                                                                                                             *"
        print "*                                                                                                                            *"
        print "* Try: 'DownloadExternalPackage.py url [localFile]'                                                                          *"
        print "*                                                                                                                            *"
        print "* Where 'URL' is the URL with an explicit package name or the URL followed by the truncated package name. And 'localFile' is *"
        print "* the file name (including extension) that you would like to save as.                                                        *"
        print "*                                                                                                                            *"
        print "* Examples:                                                                                                                  *"
        print "*                                                                                                                            *"
        print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/petsc-2.3.2-p3.tar.gz' 'petsc-2.3.2-p3.tar.gz' *"
        print "*                                                                                                                            *"
        print "*     This is the old style and the safest way to download a package.                                                        *"
        print "*                                                                                                                            *"
        print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool' 'libtool.tar.gz'                      *"
        print "*                                                                                                                            *"
        print "*     This is the new style. For packages like 'Libtool', which we never expect to be using multiple versions, this will     *"
        print "*     download the most recent version and save it as the generic 'libtool.tar.gz'.                                          *"
        print "*                                                                                                                            *"
        print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/gsl-1.' 'gsl-1.15.tar.gz'                      *"
        print "*                                                                                                                            *"
        print "*     This is the new style. This is a demonstration of how this script can be used to disambiguate a package name if there  *"
        print "*     are more than once package matching 'gsl-'.                                                                            *"
        print "*                                                                                                                            *"
        print "* DownloadExternalPackage.py 'http://issm.jpl.nasa.gov/files/externalpackages/libtool'                                       *"
        print "*                                                                                                                            *"
        print "*     This is the new style. This will download a package with 'libtool' as a prefix and save it as its canonical name.      *"
        print "*                                                                                                                            *"
        print "*                                                                                                                            *"
        print "******************************************************************************************************************************"

    if len(sys.argv) > 2:
        localFile = sys.argv[2]
        print "Downloaded file will be saved as: " + localFile
    else:
        localFile = None
        print "Downloaded file will saved with the same file name."

    print "Looking for: " + find

    # As an extra precaution, if no extension is given for a particular package
    # such as '.../libtool', then ensure that files found are of appropriate
    # file extensions.
    #
    # WARNING: The external packages directory includes executable binaries with
    # '.exe' extensions. As such, '.exe' is an acceptable suffix, but this is
    # inherently dangerous since this script can be used to download from any
    # valid website. Furthermore, if an individual attempts a "man-in-the-middle"
    # attack, then the user would be capable of downloading executables from
    # an untrusted source.
    pattern = find + "[\w.-]*(\.tar\.gz|tar\.gz2|tgz|zip|exe)?"
    parser = MyHTMLParser(pattern)

    # Creates a 'FancyURL' which allows the script to fail gracefully by catching
    # HTTP error codes 30X and several 40X(where 'X' is a natural number).
    urlObject = FancyURLopener()
    obj = urlObject.open(url)
    parser.feed(obj.read())

    # If a file pattern was used to describe the file that should be downloaded,
    # then there is the potential for multiple file matches. Currently, the script
    # will detect this ambiguity and print out all the matches, while informing
    # the user that he must refine his search.
    #
    # TODO: Prompt the user to select from a list his/her preferred target.
    if len(parser.targets) > 1:
        print "Could not resolve your download due to the number of hits."
        print "Refine your search."
        for i in parser.targets:
            print i

    elif len(parser.targets) == 1:
        print "Found: " + parser.targets[0]
        url += "/" + parser.targets[0]

        if localFile is None:
            if os.path.exists(parser.targets[0]):
                print "File " + parser.targets[
                    0] + " already exists and will not be downloaded..."
            else:
                urllib.urlretrieve(url, parser.targets[0])
                print "File saved as: " + parser.targets[0]
        else:
            if os.path.exists(localFile):
                print "File " + localFile + " already exists and will not be downloaded..."
            else:
                if parser.targets[0] == localFile:
                    print "File found and destination match."
                elif parser.matcher.match(localFile) != "None":
                    print "File found matches destination pattern."
                else:
                    print "WARNING: the file found \'" + parser.targets[
                        0] + "\' does not match \'" + localFile + "\'"
                    print "Ensure the downloaded version is suitable."

                urllib.urlretrieve(url, localFile)
                print "File saved as: " + localFile

    else:
        print "No matches found!"

    obj.close()