def __init__(self, feed_data): BeautifulSoup.__init__(self, feed_data) self.courses = tuple() _courselist = self.findAll(name="tr", attrs={"class":re.compile("info_tr.*")}) for i in _courselist: tmp = i.find(name='a') course_dict = urldecode(tmp.get("href")) course_dict['name'] = tmp.text.rpartition('(')[0].rpartition('(')[0].strip() #course_dict['homework'] = i.find(name="span", attrs={"class":"red_text"}).text.strip() self.courses += (course_dict, )
def __init__(self, throttle_value): threading.Thread.__init__(self) BeautifulSoup.__init__(self) self.pid = 0 self.connections = [] self.denied_urls = [] self.found_urls = [] self.thread_count = throttle_value self.debug = False self.test_max = 645 # Used when self.debug = True self.abort = False
def __init__(self,throttle_value): threading.Thread.__init__(self) BeautifulSoup.__init__(self) self.pid = 0 self.connections = [] self.denied_urls = [] self.found_urls = [] self.thread_count = throttle_value self.debug = False self.test_max = 645 # Used when self.debug = True self.abort = False
def __init__(self, feed_data): BeautifulSoup.__init__(self, feed_data) self.terms = tuple() _termlist = self.findAll(name='td', attrs={'class':'common_c2'}) for i in _termlist: tmp = i.find(name='a', attrs={'href':re.compile('^MyCourse.jsp.*')}) if tmp == None: continue term_dict = dict() term_dict['name'] = tmp.text.strip() term_dict['url'] = tmp.get('href') self.terms += (term_dict, )
def __init__(self, feed_data, itemtype): BeautifulSoup.__init__(self, feed_data) _itemlist = self.findAll(name="tr", attrs={"class":re.compile("tr[12]")}) self.items = tuple() for i in _itemlist: tmp = i.find("a") item_dict = urldecode(tmp.get("href")) item_dict['name'] = tmp.text.strip() if itemtype == 'homework': item_dict['is_submit'] = i.findAll(name="td")[3].text.strip() item_dict['time'] = i.findAll(name="td")[1].text.strip() item_dict['due_time'] = i.findAll(name="td")[2].text.strip() elif itemtype == 'notice': item_dict['time'] = i.findAll(name="td")[3].text.strip() elif itemtype == 'download': item_dict['time'] = i.findAll(name="td")[4].text.strip() self.items += (item_dict, )
def __init__(self, *args, **kwargs): markupmsg = copy(BeautifulSoup.MARKUP_MASSAGE) markupmsg.extend(self.fixlist) defaults = { 'markupMassage': markupmsg, } defaults.update(kwargs) return BeautifulSoup.__init__(self, *args, **defaults)
def load(self, data): data = replace_entities(data) try: BeautifulSoup.__init__(self, data) except UnicodeEncodeError: self.error = PageUnicodeError() BeautifulSoup.__init__(self, "") except: self.error = PageParseError() BeautifulSoup.__init__(self, "")
def __init__ (self, src=None, path=None): if path: src = file (path).read() src = src.replace('%','%%') BeautifulSoup.__init__ (self, src)
def __init__(self, *args, **kwargs): """ the constructor. """ BeautifulSoup.__init__(self, *args, **kwargs)