def test_encode_html(self): """utils encode_html (FAILS - DISABLED)""" # why this does not encode < ? from flexget.utils.tools import encode_html print(encode_html('<3')) assert encode_html('<3') == '<3'
def filename_from_headers(self, entry, response): """Checks entry filename if it's found from content-disposition""" data = str(response.info()) # try to decode/encode, afaik this is against the specs but some servers do it anyway try: data = data.decode('utf-8') log.debug('response info UTF-8 decoded') except UnicodeError: try: data = unicode(data) log.debug('response info unicoded') except UnicodeError: pass # now we should have unicode string, let's convert into proper format where non-ascii # chars are entities data = encode_html(data) try: filename = email.message_from_string(data).get_filename(failobj=False) except (AttributeError, SystemExit, KeyboardInterrupt): raise # at least rethrow the most common stuff before catch-all except: log.error('Failed to decode filename from response: %r' % data) return if filename: filename = decode_html(filename) log.debug('Found filename from headers: %s' % filename) if 'filename' in entry: log.debug('Overriding filename %s with %s from content-disposition' % (entry['filename'], filename)) entry['filename'] = filename