Example #1
0
                    f.write('\n')
    return add_items
            
          
if __name__ == "__main__":
    obj_spider = Spider()
    obj_sender = Sender()
    url = "http://s.dianping.com/event/nanjing" 

    while True:
        try:
            hour = int(time.strftime('%H'))
            print time.strftime('%Y%m%d %H:%M:%S')
            
            if hour >= 9 and hour <= 18: 
                new_data = obj_spider.craw(url)
                add_items = check(new_data)
                if(len(add_items) > 0):
                    print "New items!!!"
                    title = time.strftime('%Y%m%d %H:%M:%S') + '--ubuntu'
                    content = "\n".join(add_items.values())
                    obj_sender.send_email(title, content)
                else:
                    print "No change."
                time.sleep(20)
                 
            elif hour < 8 or hour > 20:
                time.sleep(3598)
            else:
                time.sleep(1000)
        except Exception, e:
Example #2
0
                f.write('\n')
    return add_items


if __name__ == "__main__":
    obj_spider = Spider()
    obj_sender = Sender()
    url1 = "https://hz.lianjia.com/ershoufang/rs%E5%8D%97%E5%B2%B8%E6%99%B6%E9%83%BD%E8%8A%B1%E5%9B%AD/"
    url2 = "https://hz.lianjia.com/ershoufang/rs%E9%A3%8E%E6%99%AF%E8%9D%B6%E9%99%A2/"
    while True:
        try:
            hour = int(time.strftime('%H'))
            print time.strftime('%Y%m%d %H:%M:%S')

            if 0 < hour < 24:
                new_data = obj_spider.craw(url1)
                new_data.extend(obj_spider.craw(url2))
                add_items = check(new_data)
                if len(add_items) > 0:
                    print "New items!!!"
                    title = time.strftime('%Y%m%d %H:%M:%S')
                    content = ''
                    for house in add_items:
                        content += house['title'] + house['price'] + house[
                            'house_info'] + '\n'
                    obj_sender.send_email(title, content)
                else:
                    print "No change."
                time.sleep(20)
            else:
                time.sleep(3600)
Example #3
0
from spider import Spider
from spiders.wdyw import spider as wdyw_spider

# 这是程序入口
# 任务还要求实现控制台交互程序,因为比较简单,这里我就不实现了

# 创建 Spider 对象
spider = Spider()

# 注册武大要闻爬虫,当然你可以实现新的爬虫,一同注册到这个 spider 对象上
spider.register('武大要闻', wdyw_spider)

# 使能所有注册的爬虫
spider.enable_all()

# 开始爬取,并且设置按照 date 逆序排序
spider.craw(order='date', reverse=True)
Example #4
0
# coding: utf-8
from spider import Spider

if __name__ == '__main__':
    root_url = "https://baike.baidu.com/item/Python"
    spider = Spider()
    spider.craw(root_url)