예제 #1
0
파일: urlextract.py 프로젝트: emnh/homedir
 def __init__(self):
     #super(SearchParser, self).__init__(self)
     if badhtml:
         HTMLParser.__init__(self, NullFormatter())
     else:
         HTMLParser.__init__(self)
     self.urls = []
예제 #2
0
 def __init__(self, attr=None, tags=None):
     if attr is None: attr = 'href'
     if tags is None: tags = ('a', )
     self.__attr = attr
     self.__tags = tags
     self.__P = HTMLParser(NullFormatter())
     self.__saved = []
     self.__tagHandlers = {}
예제 #3
0
 def compactor(dev_filename, rel_filename):
     # Use compactor to generate release version.
     echo('Compacting: %s -> %s' % (dev_filename, rel_filename))
     source_data = open(dev_filename, 'r').read()
     try:
         # Verify that the html file is correct
         htmlparser = HTMLParser(NullFormatter())
         htmlparser.feed(source_data)
         htmlparser.close()
         # Now try to minify
         output_file = open(rel_filename, 'wb')
         compactor = HTMLMinifier(output_file.write, True)
         compactor.feed(source_data)
         compactor.close()
         output_file.close()
     except HTMLParseError as e:
         error(str(e))
         exit(1)
예제 #4
0
#coding:utf-8

import urllib2
from htmllib import HTMLParser
from formatter import NullFormatter
import os
import re

url_name = "http://b.hatena.ne.jp/hotentry"
html_data = urllib2.urlopen(url_name)
parser = HTMLParser(NullFormatter())

try:
    parser.feed(html_data.read())
except TypeError:
    print "type error"

pat = re.compile("^http.*")
for link in parser.anchorlist:
    x = pat.search(link)
    if x is not None:
        print x.group(0)

예제 #5
0
 def __init__(self):
     HTMLParser.__init__(self, NullFormatter())
     self.clear_serv()
예제 #6
0
 def __init__(self):
     HTMLParser.__init__(self, NullFormatter())
예제 #7
0
 def __init__ (self):
     HTMLParser.__init__(self, NullFormatter())
     self.reg = re.compile(r'.+/(?P<name>.+\.(?:' + "|".join(_IMAGE_EXT) + ')).*$')
     self.links = []
 def __init__(self):
     NullFormatter.__init__(self)
     self.result = []