Exemple #1
0
def update_web_house_id(web_house_id, id):
    """
    修改房子网页id
    :param web_house_id:
    :param id:
    :return:
    """
    sql = """update house set web_house_id=%s, updated=%s where id=%s"""
    pool.commit(sql, [web_house_id, datetime.datetime.now(), id])
Exemple #2
0
 def parse(self, response):
     try:
         if not response.text:
             raise BaseException(u"需要切换代理")
         if not is_json(response.text):
             raise BaseException(u"需要切换代理")
         self.is_change_proxy = False
         if "GetRoomJson" in response._url:
             list_json_response = json.loads(
                 json.loads(response.text).get("d").replace("\\", "\\\\"))
             if not isinstance(list_json_response, list):
                 raise BaseException(u"返回数据错误:%s" % list_json_response)
             for item_json_data in list_json_response:
                 for item_room in item_json_data.get("rooms"):
                     if not item_room.get("location"):
                         continue
                     status = self.get_house_status(BuildingSpider,
                                                    item_room.get("status"))
                     key = item_room.get("id")
                     if int(self.dict_all_houses.get(key).get(
                             "status")) != status:
                         self.dict_all_houses.get(key)["status"] = status
                         self.list_house_key.append(key)
                         yield self.get_house_request()
             if len(self.list_house_key) == 0:
                 yield self.create_request()
         elif "GetRoomInfo" in response._url:
             json_response = json.loads(response.text)
             room = json.loads(json_response.get("d"))
             key = room.get("id")
             new_description = json.dumps(
                 dict(
                     json.loads(
                         self.dict_all_houses.get(key).get("description")),
                     **room))
             sql = """update house set status=%s, updated=%s, description=%s where id=%s"""
             pool.commit(sql, [
                 self.dict_all_houses.get(key).get("status"),
                 datetime.datetime.now(), new_description,
                 self.dict_all_houses.get(key).get("id")
             ])
             logger.info(u"修改房屋状态:%s" % room.get("location"))
             self.list_house_key.remove(key)
             if len(self.list_house_key) == 0:
                 yield self.create_request()
     except BaseException as e:
         if type(e) == CloseSpider:
             logger.warning(u"爬虫停止")
             raise CloseSpider()
         logger.warning(e)
         if e.message:
             logger.warning(e.message)
         self.is_change_proxy = True
         if "GetRoomJson" in response.request.url or "GetRoomInfo" in response.request.url:
             yield response.request
         else:
             yield self.create_request()
Exemple #3
0
def update_house_status(house_id, status):
    """
    更改房间出售情况
    :param house_id:
    :param status:
    :return:
    """
    sql = """update house set status=%s, updated=%s where id=%s"""
    param = [status, datetime.datetime.now(), house_id]
    pool.commit(sql, param)
Exemple #4
0
def update_building_count(building_id, total_count, sale_count):
    """
    更改大楼房子总数量,出售总数量
    :param building_id:
    :param total_count:
    :param sale_count:
    :return:
    """
    sql = """update building set total_count=%s, sale_count=%s, updated=%s where id=%s"""
    param = [total_count, sale_count, datetime.datetime.now(), building_id]
    pool.commit(sql, param)
Exemple #5
0
 def handle_building(self, origin_house_number):
     select_sql = """select count(1) from house where building_id=%s"""
     result = pool.find_one(select_sql, [self.building.get("id")])
     update_status = 4
     if int(result.get("count(1)")) == origin_house_number:
         update_status = 2
     update_sql = """update building set status=%s, updated=%s where status=1 and id=%s"""
     pool.commit(
         update_sql,
         [update_status,
          datetime.datetime.now(),
          self.building.get("id")])
Exemple #6
0
def update_building(pre_sale_number, id):
    """
    更改大楼预售许可证
    :param pre_sale_number:
    :param id:
    :return:
    """
    find_sql = """select * from building where id=%s"""
    result_find = pool.find_one(find_sql, [id])
    if result_find.get("per_sale_number"):
        return
    sql = """update building set pre_sale_number=%s where id=%s"""
    param = [pre_sale_number, id]
    pool.commit(sql, param)
Exemple #7
0
def update_real_estate_count(real_estate_id, house_total_count,
                             house_sell_out_count):
    """
    更改楼盘房子总数量,出售总数量
    :param real_estate_id:
    :param house_total_count:
    :param house_sell_out_count:
    :return:
    """
    sql = """update real_estate set house_total_count=%s, house_sell_out_count=%s, updated=%s where id=%s"""
    param = [
        house_total_count, house_sell_out_count,
        datetime.datetime.now(), real_estate_id
    ]
    pool.commit(sql, param)
 def save(self):
     sql = """
     insert into cart_package(jpe_name, chi_name, abbreviation, eng_name, punish_time, tags_rarity_id, created) 
     VALUES (%s, %s, %s, %s, %s, %s, %s)
     """
     param = [
         self.jpe_name, self.chi_name, self.abbreviation, self.eng_name,
         self.punish_time, self.tags_rarity_id, self.created
     ]
     return pool.commit(sql, param)
 def save(self):
     sql = """
     insert into cart_tags(chi_name, jap_name, eng_name, tags_chi_name, tags_eng_name)
     VALUES (%s, %s, %s, %s, %s)
     """
     param = [
         self.chi_name, self.jap_name, self.eng_name, self.tags_chi_name,
         self.tags_eng_name
     ]
     return pool.commit(sql, param)
Exemple #10
0
def get_house_attribute(internet_type, name):
    select_sql = """select * from house_attribute where chinese_name=%s and internet_type=%s"""
    result_house_attribute = pool.find_one(select_sql,
                                           param=[name, internet_type])
    if result_house_attribute:
        return result_house_attribute.get("id")
    else:
        insert_sql = """insert into house_attribute(chinese_name, internet_type, created) values(%s, %s, %s)"""
        return pool.commit(
            insert_sql, param=[name, internet_type,
                               datetime.datetime.now()])
Exemple #11
0
 def save(self):
     self.created = datetime.datetime.now()
     sql = """
     insert into cart(chi_name, eng_name, jpe_name, level, icon_url, effect, attack, defend, link, created,
      effect_detail, tags_type_id) 
     VALUE (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
     """
     list_param = (self.chi_name, self.eng_name, self.jpe_name, self.level,
                   self.icon_url, self.effect, self.attack, self.defend,
                   self.link, self.created, self.effect_detail,
                   self.tags_type_id)
     return pool.commit(sql, list_param)
def set_spider_now_page(new_page):
    sql = """
        update spider_conf set value = %s, updated=%s where name="spider_now_page"
    """
    param = [new_page, datetime.datetime.now()]
    pool.commit(sql, param)
 def save(self):
     sql = """insert into cart_detail(cart_id, cart_tags_id, created, cart_tags_eng_name) values (%s, %s, %s, %s)"""
     param = [self.cart_id, self.cart_tags_id, datetime.datetime.now(), self.cart_tags_eng_name]
     return pool.commit(sql, param)
Exemple #14
0
def update_region(region_id, now_page):
    sql = """update region set now_page=%s, updated=%s where id=%s"""
    param = [now_page, datetime.datetime.now(), region_id]
    pool.commit(sql, param)
Exemple #15
0
 def set_error_building_status(self, building_id):
     sql = """update building set status=3, updated=%s where id=%s and status=1"""
     pool.commit(sql, param=[datetime.datetime.now(), building_id])
Exemple #16
0
def save(sql, param):
    return pool.commit(sql, param)
Exemple #17
0
 def work(self):
     delete_logs()
     options = webdriver.ChromeOptions()
     options.add_argument("headless")
     web_driver_manager = WebDriverManager(1, "chrome", options)
     house_driver = web_driver_manager.get_web_driver(True)
     # 统计数据
     buliding_id = 0
     real_estate_id = 0
     while True:
         try:
             house = pool.find_one(self.base_select_sql)
             if not house:
                 logger.info(u"数据收集完成")
                 break
             if not house.get("web_house_id"):
                 continue
             house_driver.send_url(
                 (self.base_house_url % house.get("web_house_id")))
             # 截图整个网页
             house_driver.save_screenshot(self.save_image_url)
             # 保存图片
             img = house_driver.find_element_by_tag_name("img")
             location_img_url = self.save_image_url
             left = img.location.get("x")
             top = img.location.get("y")
             width = left + img.size.get("width")
             height = top + img.size.get("height")
             image = Image.open(location_img_url).crop(
                 (left, top, width, height))
             image.save(location_img_url)
             # 防止图片没有保存下来
             time.sleep(3)
             # 识别图片
             image_recognition = ImageRecognition(self.base_image_path,
                                                  self.save_image_url)
             expression, int_code = image_recognition.get_expression_code()
             # 发送验证码请求
             code_input = house_driver.find_element_by_id("txtCode")
             code_input.send_keys(int_code)
             house_driver.find_element_by_id("Button1").click()
             one_house_url = house_driver.current_url
             if "bid" in one_house_url:
                 # 保存成功的图片
                 image_recognition.save_success_image(
                     self.save_image_url, expression)
                 # 收集数据
                 one_house_soup = BeautifulSoup(house_driver.page_source,
                                                "html.parser")
                 if not one_house_soup.find("img"):
                     raise BaseException(u"无法获取房子数据")
                 one_house_data = unquote(
                     one_house_soup.find(
                         "img", attrs={
                             "id": "roomInfo_img"
                         }).attrs.get("src").split("text=")[1].replace(
                             "%u", "\\u").decode("unicode-escape"))
                 if not one_house_data:
                     raise BaseException(u"无法获取房子数据")
                 if one_house_data and "undefined-undefined" in one_house_data:
                     raise BaseException(u"无法获取房子数据")
                 json_data = json.loads(one_house_data)
                 # if json_data.get("HX") == u"其他":
                 #     continue
                 house_status = chinese_status.get(
                     json_data.get("FWZT")) if chinese_status.get(
                         json_data.get("FWZT")) else 7
                 inside_area = json_data.get("TNMJ")
                 built_area = json_data.get("JZMJ")
                 house_type = json_data.get("HX")
                 inside_price = json_data.get("NSDJ_TN")
                 built_price = json_data.get("NSDJ_JM")
                 pool.commit(self.base_update_sql, [
                     house_status, inside_area, built_area, house_type,
                     inside_price, built_price,
                     datetime.datetime.now(),
                     house.get("id")
                 ])
                 logger.info(u"thread:%s, %s:套内单价:%s, 套内面积:%s" %
                             (self.thread_no, house.get("door_number"),
                              inside_price, inside_area))
                 # 统计数据
                 # 不同大楼,此时统计该栋楼的数据
                 if buliding_id and buliding_id != house.get("buliding_id"):
                     sql_count_house = """select * from
                                   (select count(1) as sale_number from house where buliding_id=%s and status=2) as a, 
                                   (select count(1) as total_number from house where buliding_id=%s) as b, 
                                   (select count(1) as sold_number from house where `status` in (3,4,5) and buliding_id=%s) as c"""
                     result_count_house = pool.find_one(
                         sql_count_house,
                         [buliding_id, buliding_id, buliding_id],
                         sql_analysis=False)
                     sql_update_buliding = """update building set sale_residence_count=%s, total_count=%s, sale_count=%s, updated=%s where id=%s"""
                     pool.commit(sql_update_buliding, [
                         result_count_house[0], result_count_house[1],
                         result_count_house[2],
                         datetime.datetime.now(), buliding_id
                     ])
                     buliding_id = house.get("buliding_id")
                 # 不同楼盘,此时统计楼盘数据
                 if real_estate_id and real_estate_id != house.get(
                         "real_estate_id"):
                     sql_count_buliding = """select sum(sale_residence_count), sum(total_count), sum(sale_count) from building where real_estate_id=%s"""
                     result_count_buliding = pool.find_one(
                         sql_count_buliding, [real_estate_id])
                     sql_update_real_estate = """update real_estate set sale_count=%s, house_total_count=%s, house_sell_out_count=%s, updated=%s where id=%s"""
                     pool.commit(sql_update_real_estate, [
                         result_count_buliding.get(
                             "sum(sale_residence_count)"),
                         result_count_buliding.get("sum(total_count)"),
                         result_count_buliding.get("sum(sale_count)"),
                         datetime.datetime.now(), real_estate_id
                     ])
                     real_estate_id = house.get("real_estate_id")
                 if not buliding_id:
                     buliding_id = house.get("buliding_id")
                     real_estate_id = house.get("real_estate_id")
         except BaseException as e:
             logger.error(e)
             try:
                 web_driver_manager.destory_web_driver(
                     house_driver.get_id())
             except BaseException as e2:
                 print e2
                 command = u"taskkill /F /IM chromedriver.exe"
                 os.system(command)
             house_driver = web_driver_manager.get_web_driver(True)