def __init__(self, the_page): threading.Thread.__init__(self) HTMLParser.HTMLParser.__init__(self) self.sentencesplitter = SentenceSplitter() self.in_paragraph = False self.num_of_para = -1 # when we first meet tag p, we will be 0 self.paraString = [] self.paraSentances = [] self.end_of_intro = False self.cant_find_page = False self.gotimage = False self.feed(the_page) self.parse_to_sentances()