def getentry(self): # Start with the entry from the parent. entry = FileHandler.getentry(self) parser = HTMLTitleParser() file = self.vfs.open(self.getselector(), "rt") try: while not parser.gotcompletetitle: line = file.readline() if not line: break parser.feed(line) parser.close() except HTMLParser.HTMLParseError: # Parse error? Stop parsing, go to here. We can still # return a title if the parse error happened after we got # the title. pass file.close() # OK, we've parsed the file and exited because of either an EOF # or a complete title (or error). Now, figure out what happened. if parser.gotcompletetitle: # Convert all whitespace sequences to a single space. # Removes newlines, tabs, etc. Good for presentation # and for security. title = re.sub('[\s]+', ' ', parser.titlestr) entry.setname(title) return entry
def test_file_handler(self): handler = FileHandler( self.selector, "", self.protocol, self.config, self.stat_result, self.vfs ) self.assertTrue(handler.canhandlerequest()) self.assertFalse(handler.isdir()) entry = handler.getentry() self.assertEqual(entry.mimetype, "text/plain") self.assertEqual(entry.type, "0") wfile = io.BytesIO() handler.write(wfile) data = wfile.getvalue().decode() self.assertEqual(data, "Test\n")
def test_file_handler_non_utf8(self): self.selector = b"/\xAE.txt".decode(errors="surrogateescape") handler = FileHandler( self.selector, "", self.protocol, self.config, self.stat_result, self.vfs ) self.assertTrue(handler.canhandlerequest()) self.assertFalse(handler.isdir()) entry = handler.getentry() self.assertEqual(entry.mimetype, "text/plain") self.assertEqual(entry.type, "0") wfile = io.BytesIO() handler.write(wfile) data = wfile.getvalue() self.assertEqual(data, b"Hello, \xAE!")
def getentry(self): # Start with the entry from the parent. entry = FileHandler.getentry(self) parser = HTMLTitleParser() with self.vfs.open(self.getselector(), "rb") as fp: while not parser.gotcompletetitle: line = fp.readline() if not line: break # The PY3 HTML parser doesn't handle surrogateescape parser.feed(line.decode(errors="replace")) parser.close() # OK, we've parsed the file and exited because of either an EOF # or a complete title (or error). Now, figure out what happened. if parser.gotcompletetitle: # Convert all whitespace sequences to a single space. # Removes newlines, tabs, etc. Good for presentation # and for security. title = re.sub(r"[\s]+", " ", parser.titlestr) entry.setname(title) return entry