Beispiel #1
0
def test_basic_startup():
    import thread
    # XXX: how to do this without threads?
    httpd = server.HTTPServer(('127.0.0.1', 21210), Handler)
    thread.start_new_thread(httpd.serve_forever, ())
    assert URLopener().open("http://127.0.0.1:21210/index").read() == "xxx"
    assert URLopener().open("http://127.0.0.1:21210/").read() == "xxx"
Beispiel #2
0
    def read(self, uri=None, inventory=None):
        """Read sitemap from a URI including handling sitemapindexes

        Returns the inventory.

        Includes the subtlety that if the input URI is a local file and the 
        """
        if (inventory is None):
            inventory = Inventory()
        #
        try:
            fh = URLopener().open(uri)
        except IOError as e:
            raise Exception(
                "Failed to load sitemap/sitemapindex from %s (%s)" %
                (uri, str(e)))
        etree = parse(fh)
        # check root element: urlset (for sitemap), sitemapindex or bad
        self.sitemaps_created = 0
        if (etree.getroot().tag == '{' + SITEMAP_NS + "}urlset"):
            self.inventory_parse_xml(etree=etree, inventory=inventory)
            self.sitemaps_created += 1
        elif (etree.getroot().tag == '{' + SITEMAP_NS + "}sitemapindex"):
            if (not self.allow_multifile):
                raise Exception(
                    "Got sitemapindex from %s but support for sitemapindex disabled"
                    % (uri))
            sitemaps = self.sitemapindex_parse_xml(etree=etree)
            sitemapindex_is_file = self.is_file_uri(uri)
            # now loop over all entries to read each sitemap and add to inventory
            for sitemap_uri in sorted(sitemaps.resources.keys()):
                if (sitemapindex_is_file):
                    if (not self.is_file_uri(sitemap_uri)):
                        # Attempt to map URI to local file
                        remote_uri = sitemap_uri
                        sitemap_uri = self.mapper.src_to_dst(remote_uri)
                else:
                    # FIXME - need checks on sitemap_uri values:
                    # 1. should be in same server/path as sitemapindex URI
                    pass
                try:
                    fh = URLopener().open(sitemap_uri)
                except IOError as e:
                    raise Exception(
                        "Failed to load sitemap from %s listed in sitemap index %s (%s)"
                        % (sitemap_uri, uri, str(e)))
                self.inventory_parse_xml(fh=fh, inventory=inventory)
                self.sitemaps_created += 1
                #print "%s : now have %d resources" % (sitemap_uri,len(inventory.resources))
        else:
            raise ValueError("XML is not sitemap or sitemapindex")
        return (inventory)
    def handle_starttag(self, tag, attrs):
        #tmpoutput = ""
        count = 0
        global bDoWork
        #self.output = ""
        # Only parse the 'anchor' tag.
        if tag == "a":
            # Check the list of defined attributes.
            for name, value in attrs:
                # If href is defined, print it.
                if name == "href":
                    if value[len(value) - 3:len(value)] == "jpg":
                        #print value
                        if not "http://" in value and bDoWork == True:
                            bDoWork = False
                            tmpoutput = value
                            #print "Val: " + value
                            imgurl = 'http://apod.nasa.gov/apod/' + tmpoutput
                            #print "IMGURL: " + imgurl
                            filename = imgurl.split('/')[-1]
                            #print "FileName: " + filename

                            if (not os.path.isfile(filename)) and (
                                    'apod.nasa.gov' in imgurl):
                                print "Downloading: " + filename
                                image = URLopener()
                                image.retrieve(imgurl, filename)
                                sleep(lWaitTime)
                            elif (os.path.isfile(filename)):
                                print "Verified: " + filename
                            break
Beispiel #4
0
 def do_method(self, method):
     method_conf = self.config[method]
     matchlen = 0
     match = None
     for path in method_conf:
         if self.is_path_prefix(path) and len(path) > matchlen:
             matchlen = len(path)
             match = path
     if matchlen > 0:
         self.send_error(method_conf[match])
     elif "forward_to" in self.config:
         url = urljoin(self.config['forward_to'], self.path)
         self.log_request()
         self.log_message("Forwarding to {}".format(url))
         o = URLopener().open(url)
         self.wfile.write(o.read())
         o.close()
     elif "*" in method_conf:
         self.send_error(method_conf['*'])
     else:
         print(method.upper(), self.path, self.config['port'])
         self.log_message(
             "No match for %s %s on port %d and no default configured" %
             (method.upper(), self.path, self.config['port']))
         self.send_error(404)
Beispiel #5
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith('http'):
            url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
                                      self.session.config.sys.http_port,
                                      ('/' + url).replace('//', '/'))

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert (method in ('GET', 'POST'))

        qv, pv = [], []
        if method == 'POST':
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if '=' in arg:
                which.append(tuple(arg.split('=', 1)))
            elif arg.upper()[0] == 'P':
                which = pv
            elif arg.upper()[0] == 'Q':
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ('?' in url and '&' or '?') + qv

        # Log us in automagically!
        httpd = self.session.config.http_worker.httpd
        global HACKS_SESSION_ID
        if HACKS_SESSION_ID is None:
            HACKS_SESSION_ID = httpd.make_session_id(None)
        mailpile.auth.SetLoggedIn(None,
                                  user='******',
                                  session_id=HACKS_SESSION_ID)
        cookie = httpd.session_cookie

        try:
            uo = URLopener()
            uo.addheader('Cookie', '%s=%s' % (cookie, HACKS_SESSION_ID))
            if method == 'POST':
                (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
            else:
                (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, 'rb').read().strip()
            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
            return self._success('%s %s' % (method, url),
                                 result={
                                     'headers': hdrs.splitlines(),
                                     'data': data
                                 })
        except:
            self._ignore_exception()
            return self._error('%s %s' % (method, url))
Beispiel #6
0
def download_package(pkg_name, pkg_version):
    '''Download the required package. Sometimes the download can be flaky, so we use the
  retry decorator.'''
    pkg_type = 'sdist'  # Don't download wheel archives for now
    # This JSON endpoint is not provided by PyPI mirrors so we always need to get this
    # from pypi.python.org.
    pkg_info = json.loads(
        urlopen('https://pypi.python.org/pypi/%s/json' % pkg_name).read())

    downloader = URLopener()
    for pkg in pkg_info['releases'][pkg_version]:
        if pkg['packagetype'] == pkg_type:
            filename = pkg['filename']
            expected_md5 = pkg['md5_digest']
            if os.path.isfile(filename) and check_md5sum(
                    filename, expected_md5):
                print "File with matching md5sum already exists, skipping %s" % filename
                return True
            pkg_url = "{0}/packages/{1}".format(PYPI_MIRROR, pkg['path'])
            print "Downloading %s from %s" % (filename, pkg_url)
            downloader.retrieve(pkg_url, filename)
            actual_md5 = md5(open(filename).read()).hexdigest()
            if check_md5sum(filename, expected_md5):
                return True
            else:
                print "MD5 mismatch in file %s." % filename
                return False
    print "Could not find archive to download for %s %s %s" % (
        pkg_name, pkg_version, pkg_type)
    sys.exit(1)
Beispiel #7
0
    def test_05_03_http_image_zipfile(self):
        # Make a zipfile using files accessed from the web
        def alter_fn(module):
            self.assertTrue(isinstance(module, C.CreateWebPage))
            module.wants_zip_file.value = True
            module.zipfile_name.value = ZIPFILE_NAME
            module.directory_choice.dir_choice = C.ABSOLUTE_FOLDER_NAME
            module.directory_choice.custom_path = cpprefs.get_default_image_directory()

        url_root = "http://cellprofiler.org/svnmirror/ExampleImages/ExampleSBSImages/"
        url_query = "?r=11710"
        filenames = [(url_root, fn + url_query) for fn in
                     ("Channel1-01-A-01.tif", "Channel2-01-A-01.tif",
                      "Channel1-02-A-02.tif", "Channel2-02-A-02.tif")]
        #
        # Make sure URLs are accessible
        #
        try:
            for filename in filenames:
                URLopener().open("".join(filename)).close()
        except IOError, e:
            def bad_url(e=e):
                raise e

            unittest.expectedFailure(bad_url)()
Beispiel #8
0
def unshortenurl(short):
    from urllib import URLopener
    opener = URLopener()
    try:
        opener.open(short)
    except IOError, e:
        f = e
def getRetriever(scheme):
    """
    Get the right retriever function depending on the scheme.
    If scheme is 'http' return urllib.urlretrieve, else if the scheme is https create a URLOpener
    with certificates taken from the X509_USER_PROXY variable. If certificates are not available return
    urllib.urlretrieve as for the http case.
    """
    if 'X509_USER_PROXY' in os.environ and os.path.isfile(
            os.environ['X509_USER_PROXY']):
        certfile = os.environ['X509_USER_PROXY']
    else:
        if scheme == 'https':
            print(
                "User proxy not found. Trying to retrieve the file without using certificates"
            )
        certfile = None

    if scheme == 'http' or not certfile:
        retriever = urllib.urlretrieve
    else:
        print("Using %s as X509 certificate" % certfile)
        op = URLopener(None, key_file=certfile, cert_file=certfile)
        op.addheader('Accept', 'application/octet-stream')
        retriever = op.retrieve

    return retriever
Beispiel #10
0
def open_http(url, data=None):
    """Use HTTP protocol."""
    user_passwd = None
    proxy_passwd = None
    if isinstance(url, str):
        host, selector = splithost(url)
        if host:
            user_passwd, host = splituser(host)
            host = urllib.unquote(host)
        realhost = host
    else:
        host, selector = url
        # check whether the proxy contains authorization information
        proxy_passwd, host = splituser(host)
        # now we proceed with the url we want to obtain
        urltype, rest = urllib.splittype(selector)
        url = rest
        user_passwd = None
        if urltype.lower() != 'http':
            realhost = None
        else:
            realhost, rest = splithost(rest)
            if realhost:
                user_passwd, realhost = splituser(realhost)
            if user_passwd:
                selector = "%s://%s%s" % (urltype, realhost, rest)
            if urllib.proxy_bypass(realhost):
                host = realhost

        #print "proxy via http:", host, selector
    if not host: raise IOError('http error', 'no host given')

    if proxy_passwd:
        import base64
        proxy_auth = base64.b64encode(proxy_passwd).strip()
    else:
        proxy_auth = None

    if user_passwd:
        import base64
        auth = base64.b64encode(user_passwd).strip()
    else:
        auth = None
    c = FakeHTTPConnection(host)
    if data is not None:
        c.putrequest('POST', selector)
        c.putheader('Content-Type', 'application/x-www-form-urlencoded')
        c.putheader('Content-Length', '%d' % len(data))
    else:
        c.putrequest('GET', selector)
    if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
    if auth: c.putheader('Authorization', 'Basic %s' % auth)
    if realhost: c.putheader('Host', realhost)
    for args in URLopener().addheaders:
        c.putheader(*args)
    c.endheaders()
    return c
Beispiel #11
0
def test_static_directory():
    py.test.skip("Fails")
    import thread
    tmpdir = py.test.ensuretemp("server_static_dir")
    tmpdir.ensure("a", dir=1)
    tmpdir.join("a").ensure("a.txt").write("aaa")
    tmpdir.join("a").ensure("b.txt").write("bbb")

    class StaticDir(server.Handler):
        static_dir = tmpdir
        a_dir = server.StaticDir(tmpdir.join("a"))

    httpd = server.HTTPServer(('127.0.0.1', 0), StaticDir)
    port = httpd.server_port
    thread.start_new_thread(httpd.serve_forever, ())
    addr = "http://127.0.0.1:%d/" % port
    assert URLopener().open(addr + "a_dir/a.txt").read() == "aaa"
    assert URLopener().open(addr + "a_dir/b.txt").read() == "bbb"
    def read(self, uri=None, resources=None, index_only=False):
        """Read sitemap from a URI including handling sitemapindexes

        If index_only is True then individual sitemaps references in a sitemapindex
        will not be read. This will result in no resources being returned and is
        useful only to read the metadata and links listed in the sitemapindex.

        Includes the subtlety that if the input URI is a local file and is a 
        sitemapindex which contains URIs for the individual sitemaps, then these
        are mapped to the filesystem also.
        """
        try:
            fh = URLopener().open(uri)
            self.num_files += 1
        except IOError as e:
            raise IOError("Failed to load sitemap/sitemapindex from %s (%s)" %
                          (uri, str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
            self.logger.debug("Read %d bytes from %s" %
                              (self.content_length, uri))
        except KeyError:
            # If we don't get a length then c'est la vie
            self.logger.debug("Read ????? bytes from %s" % (uri))
            pass
        self.logger.info("Read sitemap/sitemapindex from %s" % (uri))
        s = self.new_sitemap()
        s.parse_xml(fh=fh, resources=self, capability=self.capability_name)
        # what did we read? sitemap or sitemapindex?
        if (s.parsed_index):
            # sitemapindex
            if (not self.allow_multifile):
                raise ListBaseIndexError(
                    "Got sitemapindex from %s but support for sitemapindex disabled"
                    % (uri))
            self.logger.info("Parsed as sitemapindex, %d sitemaps" %
                             (len(self.resources)))
            sitemapindex_is_file = self.is_file_uri(uri)
            if (index_only):
                # don't read the component sitemaps
                self.sitemapindex = True
                return
            # now loop over all entries to read each sitemap and add to resources
            sitemaps = self.resources
            self.resources = self.resources_class()
            self.logger.info("Now reading %d sitemaps" % len(sitemaps.uris()))
            for sitemap_uri in sorted(sitemaps.uris()):
                self.read_component_sitemap(uri, sitemap_uri, s,
                                            sitemapindex_is_file)
        else:
            # sitemap
            self.logger.info("Parsed as sitemap, %d resources" %
                             (len(self.resources)))
Beispiel #13
0
    def get(self,url,inventory=None):
        """Get a inventory from url

        Will either create a new Inventory object or add to one supplied.
        """
        # Either use inventory passed in or make a new one
        if (inventory is None):
            inventory = Inventory()

        inventory_fh = URLopener().open(url)
        Sitemap().inventory_parse_xml(fh=inventory_fh, inventory=inventory)
        return(inventory)
def download_if_not_exist(url, target_file):
    if not os.path.isfile(target_file):
        get_logger().info('downloading %s to %s', url, target_file)

        makedirs(os.path.dirname(target_file), exists_ok=True)

        temp_filename = target_file + '.part'
        if os.path.isfile(temp_filename):
            os.remove(temp_filename)
        URLopener().retrieve(url, temp_filename)
        os.rename(temp_filename, target_file)
    return target_file
Beispiel #15
0
def test_static_page():
    import thread
    tmpdir = py.test.ensuretemp("server_static_page")
    tmpdir.ensure("test.html").write("<html></html>")

    class StaticHandler(server.TestHandler):
        static_dir = str(tmpdir)
        index = server.Static(os.path.join(static_dir, "test.html"))

    httpd = server.HTTPServer(('127.0.0.1', 21212), StaticHandler)
    thread.start_new_thread(httpd.serve_forever, ())
    assert URLopener().open("http://127.0.0.1:21212/index").read() == \
           "<html></html>"
Beispiel #16
0
 def __init__(self, server, infoFile):
     """
     takes a server location and an info file as parameters in the constructor
     it will use this server to fetch the new information
     there should be a json/version and json/info.json dir on this server
     """
     self._infoFile = infoFile
     self._serverJSON = server + self._infoFile
     self._serverDate = server + "json/version"
     if sys.version < '3':
         self.br = URLopener()
     else:
         self.br = request
Beispiel #17
0
	def __init__(self,manga_name,chapter,end_chapter,manga_location,dl_manager):
		self.manga_location = manga_location
		self.manga_name	    = manga_name
		self.chapter		= chapter
		self.end_chapter	= end_chapter
		self.current_image  = "000"
		self.img			= ""
		self.imgs		    = []
		self.chapters	    = []
		self.br             = URLopener()
		self.response       = ""
		self.response_lines = ""
		self.dl_manager     = dl_manager
Beispiel #18
0
def test_static_page_implicit():
    import thread
    tmpdir = py.test.ensuretemp("server_static_page_implicit")
    tmpdir.ensure("index.html").write("<html></html>")

    class StaticHandler(server.TestHandler):
        static_dir = str(tmpdir)
        index = server.Static()

    server.patch_handler(StaticHandler)
    httpd = server.HTTPServer(('127.0.0.1', 21213), StaticHandler)
    thread.start_new_thread(httpd.serve_forever, ())
    assert URLopener().open("http://127.0.0.1:21213/index").read() == \
           "<html></html>"
Beispiel #19
0
    def command(self):
        args = list(self.args)
        method, url = args[0:2]

        if not url.startswith('http'):
            url = 'http://%s:%s%s' % (self.session.config.sys.http_host,
                                      self.session.config.sys.http_port,
                                      ('/' + url).replace('//', '/'))

        # FIXME: The python URLopener doesn't seem to support other verbs,
        #        which is really quite lame.
        method = method.upper()
        assert (method in ('GET', 'POST'))

        qv, pv = [], []
        if method == 'POST':
            which = pv
        else:
            which = qv
        for arg in args[2:]:
            if '=' in arg:
                which.append(tuple(arg.split('=', 1)))
            elif arg.upper()[0] == 'P':
                which = pv
            elif arg.upper()[0] == 'Q':
                which = qv

        if qv:
            qv = urlencode(qv)
            url += ('?' in url and '&' or '?') + qv

        try:
            uo = URLopener()
            if method == 'POST':
                (fn, hdrs) = uo.retrieve(url, data=urlencode(pv))
            else:
                (fn, hdrs) = uo.retrieve(url)
            hdrs = unicode(hdrs)
            data = open(fn, 'rb').read().strip()
            if data.startswith('{') and 'application/json' in hdrs:
                data = json.loads(data)
            return self._success('%s %s' % (method, url),
                                 result={
                                     'headers': hdrs.splitlines(),
                                     'data': data
                                 })
        except:
            self._ignore_exception()
            return self._error('%s %s' % (method, url))
    def download__grobid_service_zip_if_not_exist(self):
        if not os.path.isfile(self.grobid_service_zip_filename):
            get_logger().info(
                'downloading %s to %s',
                self.grobid_service_zip_url,
                self.grobid_service_zip_filename
            )

            makedirs(os.path.dirname(self.grobid_service_zip_filename), exists_ok=True)

            temp_zip_filename = self.grobid_service_zip_filename + '.part'
            if os.path.isfile(temp_zip_filename):
                os.remove(temp_zip_filename)
            URLopener().retrieve(self.grobid_service_zip_url, temp_zip_filename)
            os.rename(temp_zip_filename, self.grobid_service_zip_filename)
Beispiel #21
0
def try_download(_path, _file, _url, _stale,):
    now = time()
    url = URLopener()
    file_exists = isfile(_path+_file) == True
    if file_exists:
        file_old = (getmtime(_path+_file) + _stale) < now
    if not file_exists or (file_exists and file_old):
        try:
            url.retrieve(_url, _path+_file)
            result = 'ID ALIAS MAPPER: \'{}\' successfully downloaded'.format(_file)
        except IOError:
            result = 'ID ALIAS MAPPER: \'{}\' could not be downloaded'.format(_file)
    else:
        result = 'ID ALIAS MAPPER: \'{}\' is current, not downloaded'.format(_file)
    url.close()
    return result
Beispiel #22
0
    def download(self, sysctl, code):
        try:
            logging.info('Begin download files.')

            if not isdir(self.p_dwld):
                mkdir(self.p_dwld)

            obj = URLopener()
            for f in self.files:
                logging.info('Start download {}.'.format(f))
                obj.retrieve(self.url + f, self.p_dwld + f)
                logging.info('Download {} done.'.format(f))
            return True

        except BaseException as down:
            logging.error('Download {}.'.format(down))
            self._rolback(sysctl, code)
Beispiel #23
0
def download_package(pkg_name, pkg_version):
    file_name, path, expected_md5 = get_package_info(pkg_name, pkg_version)
    if not file_name:
        return False
    if os.path.isfile(file_name) and check_md5sum(file_name, expected_md5):
        print 'File with matching md5sum already exists, skipping {0}'.format(
            file_name)
        return True
    downloader = URLopener()
    pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
    print 'Downloading {0} from {1}'.format(file_name, pkg_url)
    downloader.retrieve(pkg_url, file_name)
    if check_md5sum(file_name, expected_md5):
        return True
    else:
        print 'MD5 mismatch in file {0}.'.format(file_name)
        return False
Beispiel #24
0
def download_package(pkg_name, pkg_version):
  file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name,
      pkg_version)
  if not file_name:
    return False
  if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm,
      expected_digest):
    print 'File with matching digest already exists, skipping {0}'.format(file_name)
    return True
  downloader = URLopener()
  pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
  print 'Downloading {0} from {1}'.format(file_name, pkg_url)
  downloader.retrieve(pkg_url, file_name)
  if check_digest(file_name, hash_algorithm, expected_digest):
    return True
  else:
    print 'Hash digest check failed in file {0}.'.format(file_name)
    return False
Beispiel #25
0
def startplayback_images(args):
    """Shows an image
    """
    # cache path
    sDir = xbmc.translatePath(args._addon.getAddonInfo("profile"))
    if args.PY2:
        sPath = join(sDir.decode("utf-8"), u"image.jpg")
    else:
        sPath = join(sDir, "image.jpg")

    # download image
    file = URLopener()
    file.retrieve(args.url, sPath)

    # display image
    item = xbmcgui.ListItem(getattr(args, "title", "Title not provided"), path=sPath)
    xbmcplugin.setResolvedUrl(int(args._argv[1]), True, item)
    xbmc.executebuiltin("SlideShow(" + sDir + ")")
Beispiel #26
0
def maybe_download_tesst_image(file_name):
    '''Download the given TestImages file if not in the directory

    file_name - name of file to fetch

    Image will be downloaded if not present to CP_EXAMPLEIMAGES directory.
    '''
    local_path = os.path.join(testimages_directory(), file_name)
    if not os.path.exists(local_path):
        url = testimages_url() + "/" + file_name
        try:
            URLopener().retrieve(url, local_path)
        except IOError, e:
            # This raises the "expected failure" exception.
            def bad_url(e=e):
                raise e

            unittest.expectedFailure(bad_url)()
Beispiel #27
0
    def call_remote(self, category, params):
        '''
        The meetup api is set up such that the root url does not
        change much other than the'name' of the thing you call into.

        In other words, I can just use category to sprintf my way to a
        valid url, then tack on the rest of the query string specified
        in params.
        '''
        url = self.root_url
        url = url % (category)
        # Every call has to include key
        url = url + "?" + params + "&key=" + self.key
        client = URLopener()
        request = client.open(url)
        raw_str = request.read()
        results = json.loads(raw_str)
        # Let the caller interpret the results of the call. Both the
        # meta info and the results are passed back
        return results
Beispiel #28
0
    def parse(self,uri=None,fh=None,str=None):
        """Parse a single XML document for this list

        Accepts either a uri (uri or default if parameter not specified), 
        or a filehandle (fh) or a string (str).

        Does not handle the case of sitemapindex+sitemaps
        """
        if (uri is not None):
            try:
                fh = URLopener().open(uri)
            except IOError as e:
                raise Exception("Failed to load sitemap/sitemapindex from %s (%s)" % (uri,str(e)))
        elif (str is not None):
            fh=StringIO.StringIO(str)
        if (fh is None):
            raise Exception("Nothing to parse")
        s = self.new_sitemap()
        s.parse_xml(fh=fh,resources=self,capability=self.capability_name,sitemapindex=False)
        self.parsed_index = s.parsed_index
    def read_component_sitemap(self, sitemapindex_uri, sitemap_uri, sitemap,
                               sitemapindex_is_file):
        """Read a component sitemap of a Resource List with index

        Each component must be a sitemap with the 
        """
        if (sitemapindex_is_file):
            if (not self.is_file_uri(sitemap_uri)):
                # Attempt to map URI to local file
                remote_uri = sitemap_uri
                sitemap_uri = self.mapper.src_to_dst(remote_uri)
                self.logger.info("Mapped %s to local file %s" %
                                 (remote_uri, sitemap_uri))
            else:
                # The individual sitemaps should be at a URL (scheme/server/path)
                # that the sitemapindex URL can speak authoritatively about
                if (self.check_url_authority and not UrlAuthority(
                        sitemapindex_uri).has_authority_over(sitemap_uri)):
                    raise ListBaseIndexError(
                        "The sitemapindex (%s) refers to sitemap at a location it does not have authority over (%s)"
                        % (sitemapindex_uri, sitemap_uri))
        try:
            fh = URLopener().open(sitemap_uri)
            self.num_files += 1
        except IOError as e:
            raise ListBaseIndexError(
                "Failed to load sitemap from %s listed in sitemap index %s (%s)"
                % (sitemap_uri, sitemapindex_uri, str(e)))
        # Get the Content-Length if we can (works fine for local files)
        try:
            self.content_length = int(fh.info()['Content-Length'])
            self.bytes_read += self.content_length
        except KeyError:
            # If we don't get a length then c'est la vie
            pass
        self.logger.info("Reading sitemap from %s (%d bytes)" %
                         (sitemap_uri, self.content_length))
        component = sitemap.parse_xml(fh=fh, sitemapindex=False)
        # Copy resources into self, check any metadata
        for r in component:
            self.resources.add(r)
Beispiel #30
0
    def __init_from_url(self, name, url, nocache=False):
        if not self.cache: raise PkgUnfetchable("cahce object not specified")
        self._debug("__init_from_url %s/%s.tbz" % (url, name))
        # and not self.__init_from_cache(name):
        if name not in self.cache.pkgnames():
            self._debug("package name %s" % name)
            path = os.path.join(self.cache.cachepath, "%s.tbz" % name)
            self._debug("path: %s" % path)
            try:
                getfile = URLopener().retrieve(
                    "%s/%s.tbz" % (url, name),
                    filename=path,
                    reporthook=None  # reporthook(blocknum, bs, size)
                )
            except IOError:
                raise PkgUnfetchable("Can't fetch package %s from %s" % \
                        ( name, url))

            self.path = path
            self.__init_from_path(
                os.path.join(self.cache.cachepath, "%s.tbz" % name))
            self.cache.cache(self)