def parse_a_page(url): bags = BagSet() try: response = urllib2.urlopen(url) html_source = response.read() soup = BeautifulSoup(html_source) divs = soup.findAll('div', attrs={'class': 'details'}) except: print '[ERROR][read source code]', sys.exc_info() return bags for div in divs: div_str = str(div) div_str = div_str.replace('\n', '') div_str = re.sub('\s+', ' ', div_str) try: bags.add( Bag(div_str) ) except: print '[ERROR][create bag obj]', sys.exc_info() return bags
return bags for div in divs: div_str = str(div) div_str = div_str.replace('\n', '') div_str = re.sub('\s+', ' ', div_str) try: bags.add( Bag(div_str) ) except: print '[ERROR][create bag obj]', sys.exc_info() return bags if __name__ == '__main__': last_bags = BagSet() new_bags = BagSet() fout = open("nm_sale_handbags.dat", "a") while True: cur_time = strftime("%Y-%m-%d %H:%M:%S", gmtime()) cur_bags = parse_a_page(URL_NM_SALE_BAG) for b in cur_bags.bags: if not last_bags.exist(b): new_bags.add(b) if len(new_bags.bags) > 0: print cur_time, 'Send email!' # Set up the email here send_email( str(new_bags) ) print >> fout, new_bags else: