def __init__(self):
     self.log = cd_log.MyLog('DataSaver', 'logs')
     self.filename = "../doc/origin_data/cd_lianjia-qingyangqu4.csv"
     with open(self.filename, 'w', encoding='utf-8', newline='') as f:
         data = [
             "id",
             "小区名称",
             "所在区域",
             "总价",
             "单价",
             "房屋户型",
             "所在楼层",
             "建筑面积",
             "户型结构",
             "套内面积",
             "建筑类型",
             "房屋朝向",
             "建筑结构",
             "装修情况",
             "梯户比例",
             "配备电梯",
             "产权年限",
             "挂牌时间",
             "交易权属",
             "上次交易",
             "房屋用途",
             "房屋年限",
             "产权所属",
             "抵押信息",
             "房本备件",
         ]
         writer = csv.writer(f, dialect='excel')
         writer.writerow(data)
Example #2
0
 def __init__(self):
     self.log = cd_log.MyLog("HtmlDownloader", "logs")
     self.USER_AGENTS = [
         "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0;   Acoo Browser; GTB5; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;   SV1) ; InfoPath.1; .NET CLR 3.5.30729; .NET CLR 3.0.30618)",
         "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Acoo Browser 1.98.744; .NET CLR 3.5.30729)",
         "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Acoo Browser; GTB5; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; InfoPath.1; .NET CLR 3.5.30729; .NET CLR 3.0.30618)",
         "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; SV1; Acoo Browser; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; Avant Browser)",
         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; GTB5; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; Maxthon; InfoPath.1; .NET CLR 3.5.30729; .NET CLR 3.0.30618)",
         "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; GTB5;",
         "Mozilla/4.0 (compatible; Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Acoo Browser 1.98.744; .NET CLR 3.5.30729); Windows NT 5.1; Trident/4.0)",
         "Mozilla/4.0 (compatible; Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB6; Acoo Browser; .NET CLR 1.1.4322; .NET CLR 2.0.50727); Windows NT 5.1; Trident/4.0; Maxthon; .NET CLR 2.0.50727; .NET CLR 1.1.4322; InfoPath.2)",
         "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Acoo Browser; GTB6; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; InfoPath.1; .NET CLR 3.5.30729; .NET CLR 3.0.30618)",
         "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; Acoo Browser; GTB5; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; InfoPath.1; .NET CLR 3.5.30729; .NET CLR 3.0.30618)",
         "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB6; Acoo Browser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
     ]
     self.headers = {
         'Accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         "Accept-Encoding": "gzip, deflate, br",
         'Accept-Language': 'zh-CN,zh;q=0.9',
         "Cache-Control": "max-age=0",
         "User-Agent": random.choice(self.USER_AGENTS),
         "Referer": "https://cd.lianjia.com/ershoufang/",
     }
 def __init__(self):
     # 实例化一个log对象 用于日志输出
     self.log = cd_log.MyLog('DataSaver', 'logs')
     # 保存路径
     self.filename = "../doc/origin_data/cd_lianjia-wuhouqu.csv"
     # 在init中写csv表头
     with open(self.filename, 'w', encoding='utf-8', newline='') as f:
         # 根据我们要采集的数据按顺序写好表头列表
         data = [
             "id",
             "小区名称",
             "所在区域",
             "总价",
             "单价",
             "房屋户型",
             "所在楼层",
             "建筑面积",
             "户型结构",
             "套内面积",
             "建筑类型",
             "房屋朝向",
             "建筑结构",
             "装修情况",
             "梯户比例",
             "配备电梯",
             "产权年限",
             "挂牌时间",
             "交易权属",
             "上次交易",
             "房屋用途",
             "房屋年限",
             "产权所属",
             "抵押信息",
             "房本备件",
         ]
         writer = csv.writer(f, dialect='excel')
         writer.writerow(data)
 def __init__(self):
     self.log = cd_log.MyLog('HtmlParser', 'logs')
Example #5
0
 def __init__(self):
     self.log = cd_log.MyLog('lianjia_spider', 'logs')
     self.downloader = HtmlDownloader()
     self.parser = HtmlParser()
     self.um = UrlManager()
     self.saver = DataSaver()