コード例 #1
0
ファイル: crawler.py プロジェクト: saintz666/BagSaleDetector
def parse_a_page(url):
    bags = BagSet()
    try:
        response = urllib2.urlopen(url)
        html_source = response.read()
        soup = BeautifulSoup(html_source)
        divs = soup.findAll('div', attrs={'class': 'details'})
    except:
        print '[ERROR][read source code]', sys.exc_info()
        return bags
    
    for div in divs:
        div_str = str(div)
        div_str = div_str.replace('\n', '')
        div_str = re.sub('\s+', ' ', div_str)
        try:
            bags.add( Bag(div_str) )
        except:
            print '[ERROR][create bag obj]', sys.exc_info()

    return bags
コード例 #2
0
ファイル: crawler.py プロジェクト: saintz666/BagSaleDetector
        return bags
    
    for div in divs:
        div_str = str(div)
        div_str = div_str.replace('\n', '')
        div_str = re.sub('\s+', ' ', div_str)
        try:
            bags.add( Bag(div_str) )
        except:
            print '[ERROR][create bag obj]', sys.exc_info()

    return bags


if __name__ == '__main__':
    last_bags = BagSet()
    new_bags = BagSet()
    fout = open("nm_sale_handbags.dat", "a")
    while True:
        cur_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
        cur_bags = parse_a_page(URL_NM_SALE_BAG)
        for b in cur_bags.bags:
            if not last_bags.exist(b):
                new_bags.add(b)

        if len(new_bags.bags) > 0:
            print cur_time, 'Send email!'
            # Set up the email here
            send_email( str(new_bags) )
            print >> fout, new_bags
        else: