コード例 #1
0
 def testDotPaths(self):
     # Test that paths containing dots are handled correctly.
     #
     # We expect the returned directory and file names to only
     # include those links http://example.com/foo/ even in the
     # presence of "." and ".." path segments.
     content = '''
     <html>
       <head>
         <title>Listing</title>
         <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
       </head>
       <body>
       <pre>
       <a href="../">Up a level</a>
       <a href="/foo/../">The same again</a>
       <a href="file1/../file2">file2</a>
       <a href=".">This directory</a>
       <a href="dir/.">A subdirectory</a>
       </pre>
     </html>
     '''
     listing_url = 'http://example.com/foo/'
     responses.add('GET', listing_url, body=content)
     responses.add('HEAD', listing_url + 'file2')
     walker = HTTPWalker(listing_url, logging.getLogger())
     dirnames, filenames = walker.list('/foo/')
     self.assertEqual(dirnames, ['dir/'])
     self.assertEqual(filenames, ['file2'])
コード例 #2
0
    def testNonAsciiListing(self):
        # Test that list() handles non-ASCII output.
        content = b'''
        <html>
          <head>
            <title>Listing</title>
            <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
          </head>
          <body>
          <p>A non-breaking space: \xc2\xa0</p>
          <p><a href="/elsewhere">Somewhere else on the site</a></p>
          <!-- intentionally unclosed anchor below -->
          <p><a href="/foo/file99">Absolute path</p>

          <pre>
          <a href="../">Parent directory</a>
          <a href="subdir1/">subdir 1</a>
          <a href="subdir2/">subdir 2</a>
          <a href="subdir3/">subdir 3</a>
          <a href="file3">file 3</a>
          <a href="file2">file 2</a>
          <a href="file1">file 1</a>
          </pre>
        </html>
        '''
        listing_url = 'http://example.com/foo/'
        responses.add('GET', listing_url, body=content)
        expected_filenames = ['file1', 'file2', 'file3', 'file99']
        for filename in expected_filenames:
            responses.add('HEAD', listing_url + filename)
        walker = HTTPWalker(listing_url, logging.getLogger())
        dirnames, filenames = walker.list('/foo/')
        self.assertEqual(dirnames, ['subdir1/', 'subdir2/', 'subdir3/'])
        self.assertEqual(filenames, expected_filenames)
コード例 #3
0
    def testSquidFtpListing(self):
        # Test that a Squid FTP listing can be parsed.
        content = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- HTML listing generated by Squid 2.5.STABLE12 -->
<!-- Wed, 06 Sep 2006 11:04:02 GMT -->
<HTML><HEAD><TITLE>
FTP Directory: ftp://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/
</TITLE>
<STYLE type="text/css"><!--BODY{background-color:#ffffff;font-family:verdana,sans-serif}--></STYLE>
</HEAD><BODY>
<H2>
FTP Directory: <A HREF="/">ftp://ftp.gnome.org</A>/<A HREF="/pub/">pub</A>/<A HREF="/pub/GNOME/">GNOME</A>/<A HREF="/pub/GNOME/sources/">sources</A>/<A HREF="/pub/GNOME/sources/gnome-gpg/">gnome-gpg</A>/<A HREF="/pub/GNOME/sources/gnome-gpg/0.5/">0.5</A>/</H2>
<PRE>
<A HREF="../"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-dirup.gif" ALT="[DIRUP]"></A> <A HREF="../">Parent Directory</A>
<A HREF="LATEST-IS-0.5.0"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-link.gif" ALT="[LINK]"></A> <A HREF="LATEST-IS-0.5.0">LATEST-IS-0.5.0</A>. . . . . . . . . Sep 02 07:07         <A HREF="LATEST-IS-0.5.0;type=a"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-text.gif" ALT="[VIEW]"></A> <A HREF="LATEST-IS-0.5.0;type=i"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-box.gif" ALT="[DOWNLOAD]"></A> -> <A HREF="gnome-gpg-0.5.0.tar.gz">gnome-gpg-0.5.0.tar.gz</A>
<A HREF="gnome-gpg-0.5.0.md5sum"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-unknown.gif" ALT="[FILE]"></A> <A HREF="gnome-gpg-0.5.0.md5sum">gnome-gpg-0.5.0.md5sum</A> . . . . . Sep 02 06:58    115  <A HREF="gnome-gpg-0.5.0.md5sum;type=a"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-text.gif" ALT="[VIEW]"></A> <A HREF="gnome-gpg-0.5.0.md5sum;type=i"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-box.gif" ALT="[DOWNLOAD]"></A>
<A HREF="gnome-gpg-0.5.0.tar.bz2"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-compressed.gif" ALT="[FILE]"></A> <A HREF="gnome-gpg-0.5.0.tar.bz2">gnome-gpg-0.5.0.tar.bz2</A>. . . . . Sep 02 06:58     68K <A HREF="gnome-gpg-0.5.0.tar.bz2;type=i"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-box.gif" ALT="[DOWNLOAD]"></A>
<A HREF="gnome-gpg-0.5.0.tar.gz"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-tar.gif" ALT="[FILE]"></A> <A HREF="gnome-gpg-0.5.0.tar.gz">gnome-gpg-0.5.0.tar.gz</A> . . . . . Sep 02 06:58     81K <A HREF="gnome-gpg-0.5.0.tar.gz;type=i"><IMG border="0" SRC="http://squid:3128/squid-internal-static/icons/anthony-box.gif" ALT="[DOWNLOAD]"></A>
</PRE>
<HR noshade size="1px">
<ADDRESS>
Generated Wed, 06 Sep 2006 11:04:02 GMT by squid (squid/2.5.STABLE12)
</ADDRESS></BODY></HTML>
        '''
        listing_url = 'ftp://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/'
        responses.add('GET', listing_url, body=content)
        walker = HTTPWalker(listing_url, logging.getLogger())
        dirnames, filenames = walker.list('/pub/GNOME/sources/gnome-gpg/0.5/')
        self.assertEqual(dirnames, [])
        self.assertEqual(filenames, ['LATEST-IS-0.5.0',
                                     'gnome-gpg-0.5.0.md5sum',
                                     'gnome-gpg-0.5.0.tar.bz2',
                                     'gnome-gpg-0.5.0.tar.gz'])
コード例 #4
0
    def testApacheListing(self):
        # Test that list() handles a standard Apache dir listing.
        content = '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
 <head>
  <title>Index of /pub/GNOME/sources/gnome-gpg/0.5</title>
 </head>
 <body>
<h1>Index of /pub/GNOME/sources/gnome-gpg/0.5</h1>
<pre><img src="/icons/blank.gif" alt="Icon " width="24" height="24"> <a href="?C=N;O=D">Name</a>                          <a href="?C=M;O=A">Last modified</a>      <a href="?C=S;O=A">Size</a>  <a href="?C=D;O=A">Description</a><hr><a href="/pub/GNOME/sources/gnome-gpg/"><img src="http://www.gnome.org/img/24x24/parent.png" alt="[DIR]" width="24" height="24"></a> <a href="/pub/GNOME/sources/gnome-gpg/">Parent Directory</a>                                   -

<a href="LATEST-IS-0.5.0"><img src="http://www.gnome.org/img/24x24/default.png" alt="[   ]" width="24" height="24"></a> <a href="LATEST-IS-0.5.0">LATEST-IS-0.5.0</a>               02-Sep-2006 08:58   81K
<a href="gnome-gpg-0.5.0.md5sum"><img src="http://www.gnome.org/img/24x24/default.png" alt="[   ]" width="24" height="24"></a> <a href="gnome-gpg-0.5.0.md5sum">gnome-gpg-0.5.0.md5sum</a>        02-Sep-2006 08:58  115
<a href="gnome-gpg-0.5.0.tar.bz2"><img src="http://www.gnome.org/img/24x24/archive.png" alt="[   ]" width="24" height="24"></a> <a href="gnome-gpg-0.5.0.tar.bz2">gnome-gpg-0.5.0.tar.bz2</a>       02-Sep-2006 08:58   68K
<a href="gnome-gpg-0.5.0.tar.gz"><img src="http://www.gnome.org/img/24x24/archive.png" alt="[   ]" width="24" height="24"></a> <a href="gnome-gpg-0.5.0.tar.gz">gnome-gpg-0.5.0.tar.gz</a>        02-Sep-2006 08:58   81K
<hr></pre>

<address>Apache/2.2.3 (Unix) Server at <a href="mailto:[email protected]">ftp.acc.umu.se</a> Port 80</address>
</body></html>
        '''
        listing_url = 'http://ftp.gnome.org/pub/GNOME/sources/gnome-gpg/0.5/'
        responses.add('GET', listing_url, body=content)
        expected_filenames = [
            'LATEST-IS-0.5.0',
            'gnome-gpg-0.5.0.md5sum',
            'gnome-gpg-0.5.0.tar.bz2',
            'gnome-gpg-0.5.0.tar.gz',
            ]
        for filename in expected_filenames:
            responses.add('HEAD', listing_url + filename)
        walker = HTTPWalker(listing_url, logging.getLogger())
        dirnames, filenames = walker.list('/pub/GNOME/sources/gnome-gpg/0.5/')
        self.assertEqual(dirnames, [])
        self.assertEqual(filenames, expected_filenames)
コード例 #5
0
 def testGarbageListing(self):
     # Make sure that garbage doesn't trip up the dir lister.
     content = b'\x01\x02\x03\x00\xff\xf2\xablkjsdflkjsfkljfds'
     listing_url = 'http://example.com/foo/'
     responses.add('GET', listing_url, body=content)
     walker = HTTPWalker(listing_url, logging.getLogger())
     dirnames, filenames = walker.list('/foo/')
     self.assertEqual(dirnames, [])
     self.assertEqual(filenames, [])
コード例 #6
0
 def testNamedAnchors(self):
     # Test that the directory listing parser code handles named anchors.
     # These are <a> tags without an href attribute.
     content = '''
     <html>
       <head>
         <title>Listing</title>
       </head>
       <body>
       <a name="top"></a>
       <pre>
       <a href="file1">file1</a>
       <a href="dir1/">dir1/</a>
       <a href="#top">Go to top</a>
       </pre>
     </html>
     '''
     listing_url = 'http://example.com/foo/'
     responses.add('GET', listing_url, body=content)
     responses.add('HEAD', listing_url + 'file1')
     walker = HTTPWalker(listing_url, logging.getLogger())
     dirnames, filenames = walker.list('/foo/')
     self.assertEqual(dirnames, ['dir1/'])
     self.assertEqual(filenames, ['file1'])