Ejemplo n.º 1
0
 def handle_starttag(self, tag, attrs):
     attr_d = dict(attrs)
     title = attr_d.get('title', '').strip()
     if tag in self.link_types.keys():
         target = attr_d.get(self.link_types[tag], "")
         if target:
             if "#" in target:
                 target = target[:target.index('#')]
             self.process_link(target, tag, title)
     elif tag == 'base':
         self.base = attr_d.get('href', self.base)
     elif tag == 'meta' and \
       attr_d.get('http-equiv', '').lower() == 'content-type':
         ct = attr_d.get('content', None)
         if ct:
             try:
                 media_type, params = ct.split(";", 1)
             except ValueError:
                 media_type, params = ct, ''
             media_type = media_type.lower()
             param_dict = {}
             for param in rh.split_string(
                 params, syntax.PARAMETER, "\s*;\s*"
             ):
                 try:
                     a, v = param.split("=", 1)
                     param_dict[a.lower()] = rh.unquote_string(v)
                 except ValueError:
                     param_dict[param.lower()] = None
             self.doc_enc = param_dict.get('charset', self.doc_enc)
Ejemplo n.º 2
0
 def handle_starttag(self, tag, attrs):
     attr_d = dict(attrs)
     title = attr_d.get('title', '').strip()
     if tag in self.link_types.keys():
         target = attr_d.get(self.link_types[tag], "")
         if target:
             if "#" in target:
                 target = target[:target.index('#')]
             self.process_link(target, tag, title)
     elif tag == 'base':
         self.base = attr_d.get('href', self.base)
     elif tag == 'meta' and \
       attr_d.get('http-equiv', '').lower() == 'content-type':
         ct = attr_d.get('content', None)
         if ct:
             try:
                 media_type, params = ct.split(";", 1)
             except ValueError:
                 media_type, params = ct, ''
             media_type = media_type.lower()
             param_dict = {}
             for param in rh.split_string(params, syntax.PARAMETER,
                                          "\s*;\s*"):
                 try:
                     a, v = param.split("=", 1)
                     param_dict[a.lower()] = rh.unquote_string(v)
                 except ValueError:
                     param_dict[param.lower()] = None
             self.doc_enc = param_dict.get('charset', self.doc_enc)
Ejemplo n.º 3
0
 def test_split_string(self):
     i = 0
     for (instr, expected_outlist, item, split) in [
         ('"abc", "def"', ['"abc"',
                           '"def"'], syntax.QUOTED_STRING, r"\s*,\s*"),
         (r'"\"ab", "c\d"', [r'"\"ab"',
                             r'"c\d"'], syntax.QUOTED_STRING, r"\s*,\s*")
     ]:
         self.red.__init__()
         outlist = rh.split_string(unicode(instr), item, split)
         self.assertEqual(
             expected_outlist, outlist,
             "[%s] %s != %s" % (i, str(expected_outlist), str(outlist)))
         i += 1
Ejemplo n.º 4
0
 def test_split_string(self):
     i = 0
     for (instr, expected_outlist, item, split) in [
         ('"abc", "def"', 
          ['"abc"', '"def"'], 
          syntax.QUOTED_STRING, 
          r"\s*,\s*"
         ),
         (r'"\"ab", "c\d"', 
          [r'"\"ab"', r'"c\d"'], 
          syntax.QUOTED_STRING, 
          r"\s*,\s*"
         )
     ]:
         self.red.__init__()
         outlist = rh.split_string(unicode(instr), item, split)
         self.assertEqual(expected_outlist, outlist, 
             "[%s] %s != %s" % (i, str(expected_outlist), str(outlist)))
         i += 1