Ejemplo n.º 1
0
def get_tags_by_name(name, tags_eng_name=None):
    if tags_eng_name:
        sql = """select id from cart_tags where tags_eng_name=%s and chi_name like '%%s%'"""
        param = [tags_eng_name, name]
    else:
        sql = """select id from cart_tags where chi_name like '%%s%'"""
        param = [name]
    return pool.find_one(sql, param=param)
Ejemplo n.º 2
0
def get_building_sale_status(sale_building, real_estate_id):
    """
    查询该大楼的出售情况
    :param building_id:
    :return:
    """
    sql = """select total_count, sale_count, id from building where sale_building=%s and real_estate_id=%s"""
    param = [sale_building, real_estate_id]
    return pool.find_one(sql, param)
Ejemplo n.º 3
0
def get_house_status(door_number, real_estate_id, buliding_id, house_unit):
    """
    查询房间出售情况
    :param house_id:
    :return:
    """
    sql = """select status, id, web_house_id from house where door_number=%s and real_estate_id=%s and buliding_id=%s and unit=%s"""
    param = [door_number, real_estate_id, buliding_id, house_unit]
    return pool.find_one(sql, param)
Ejemplo n.º 4
0
def get_building(building_id=None):
    """

    :param building_id:
    :return:
    """
    sql = """select * from building where id=%s"""
    param = [building_id]
    return pool.find_one(sql, param)
Ejemplo n.º 5
0
 def get_request_body(self):
     now_total_building = self.get_total_building()
     if now_total_building == self.total_building:
         self.now_index += 1
     self.building = pool.find_one(self.building_sql, [self.now_index])
     if not self.building:
         logger.warning(u"爬虫停止")
         raise CloseSpider()
     temp_dict = {"buildingid": self.building.get("web_building_id")}
     return json.dumps(temp_dict)
Ejemplo n.º 6
0
def get_house_attribute(internet_type, name):
    select_sql = """select * from house_attribute where chinese_name=%s and internet_type=%s"""
    result_house_attribute = pool.find_one(select_sql,
                                           param=[name, internet_type])
    if result_house_attribute:
        return result_house_attribute.get("id")
    else:
        insert_sql = """insert into house_attribute(chinese_name, internet_type, created) values(%s, %s, %s)"""
        return pool.commit(
            insert_sql, param=[name, internet_type,
                               datetime.datetime.now()])
Ejemplo n.º 7
0
 def handle_building(self, origin_house_number):
     select_sql = """select count(1) from house where building_id=%s"""
     result = pool.find_one(select_sql, [self.building.get("id")])
     update_status = 4
     if int(result.get("count(1)")) == origin_house_number:
         update_status = 2
     update_sql = """update building set status=%s, updated=%s where status=1 and id=%s"""
     pool.commit(
         update_sql,
         [update_status,
          datetime.datetime.now(),
          self.building.get("id")])
Ejemplo n.º 8
0
 def get_request_body(self):
     self.building = pool.find_one(self.base_sql, [self.now_db_index])
     if not self.building:
         logger.warning(u"爬虫停止")
         raise CloseSpider()
     self.now_db_index += 1
     list_all_houses = pool.find(self.all_houses_sql,
                                 [self.building.get("id")])
     self.dict_all_houses = dict()
     for item_house in list_all_houses:
         self.dict_all_houses[item_house.get("web_house_id")] = item_house
     temp_dict = {"buildingid": self.building.get("web_building_id")}
     return json.dumps(temp_dict)
Ejemplo n.º 9
0
def get_tags_by_tags_eng_name(tags_eng_name,
                              value,
                              tags_eng_name2=None,
                              is_equal=False):
    tags_eng_name = tags_eng_name.upper()
    if tags_eng_name != "PACKAGE":
        value = value.replace(" ", "")
        value = value.replace("\n", "")
    if not value:
        return None
    if tags_eng_name2:
        tags_eng_name2 = tags_eng_name2.upper()
    # sql = """select * from cart_tags where tags_eng_name="%s" and chi_name like "%%%%%s%%%%" """ %\
    #       (tags_eng_name, value)
    sql = """select * from cart_tags where tags_eng_name=%s and chi_name =%s"""
    cart_tags = pool.find_one(sql, [tags_eng_name, value])
    if cart_tags:
        return cart_tags
    elif tags_eng_name2:
        # sql = """select * from cart_tags where tags_eng_name in ("%s", "%s") and chi_name like "%%%%%s%%%%" """ % (
        #     tags_eng_name, tags_eng_name2, value)
        sql = """select * from cart_tags where tags_eng_name in (%s, %s) and chi_name =%s """
        cart_tags = pool.find_one(sql, [tags_eng_name, tags_eng_name2, value])
        if cart_tags:
            return cart_tags
    if dict_tags.get(tags_eng_name):
        cart_tags = CartTags()
        cart_tags.chi_name = value
        cart_tags.eng_name = value
        cart_tags.jap_name = value
        cart_tags.tags_chi_name = dict_tags.get(tags_eng_name).get(
            "tags_chi_name")
        cart_tags.tags_eng_name = dict_tags.get(tags_eng_name).get(
            "tags_eng_name")
        cart_tags.save()
        return get_tags_by_tags_eng_name(tags_eng_name, value, tags_eng_name2)
    else:
        print u"该分类不存在%s, %s" % (tags_eng_name, value)
Ejemplo n.º 10
0
def update_building(pre_sale_number, id):
    """
    更改大楼预售许可证
    :param pre_sale_number:
    :param id:
    :return:
    """
    find_sql = """select * from building where id=%s"""
    result_find = pool.find_one(find_sql, [id])
    if result_find.get("per_sale_number"):
        return
    sql = """update building set pre_sale_number=%s where id=%s"""
    param = [pre_sale_number, id]
    pool.commit(sql, param)
Ejemplo n.º 11
0
def get_real_estate_sale_status(real_estate_id=None, real_estate_name=None):
    """
    查询该楼盘出售情况
    :param param:
    :return:
    """
    if real_estate_id and real_estate_name:
        sql = """select house_total_count, house_sell_out_count from real_estate where id = %s and name=%s"""
        param = [real_estate_id, real_estate_name]
    elif real_estate_name:
        sql = """select house_total_count, house_sell_out_count from real_estate where  name=%s"""
        param = [real_estate_name]
    elif real_estate_id:
        sql = """select house_total_count, house_sell_out_count from real_estate where id = %s"""
        param = [real_estate_id]
    else:
        return False
    return pool.find_one(sql, param)
Ejemplo n.º 12
0
def get_real_estate_statictics_data(real_estate_id):
    sql = """select sum(total_count), sum(sale_count) from building where real_estate_id=%s"""
    param = [real_estate_id]
    return pool.find_one(sql, param)
Ejemplo n.º 13
0
 def get_total_building(self):
     sql = """select count(1) from building where status in (1,4)"""
     result = pool.find_one(sql)
     if result:
         return result.get("count(1)")
     return 0
Ejemplo n.º 14
0
def get(sql, param):
    return pool.find_one(sql, param)
Ejemplo n.º 15
0
def get_cart_package_by_jpe_name(jpe_name):
    sql = """select * from cart_package where jpe_name=%s"""
    return pool.find_one(sql, param=[jpe_name])
Ejemplo n.º 16
0
def get_tags_by_tags_eng_name_equal(tags_eng_name, value):
    sql = """select * from cart_tags where tags_eng_name=%s and chi_name=%s"""
    param = [tags_eng_name, value]
    return pool.find_one(sql, param)
Ejemplo n.º 17
0
def get_switch_activity(switch_code):
    sql = """select status from sys_function_switch where switch_code=%s"""
    result = pool.find_one(sql, [switch_code])
    return result.get("status")
Ejemplo n.º 18
0
def get_spider_now_page():
    sql = """
        select * from spider_conf where name="spider_now_page"
    """
    result = pool.find_one(sql)
    return result.get("value")
Ejemplo n.º 19
0
def get_real_estate(real_estate_name, region):
    sql = """select id, house_total_count, house_sell_out_count from real_estate where name=%s and region=%s"""
    param = [real_estate_name, region]
    return pool.find_one(sql, param)
Ejemplo n.º 20
0
 def handle_sql(self, sql, param=None):
     return pool.find_one(sql, param=param)
Ejemplo n.º 21
0
def get_building_statictics_data(buliding_id, real_estate_id):
    sql = """select total_count, sale_count from (select count(id) as total_count from house where buliding_id=%s and real_estate_id=%s) as a, 
            (SELECT count(id) as sale_count from house where `status`=4 and buliding_id=%s and real_estate_id=%s) as b"""
    param = [buliding_id, real_estate_id, buliding_id, real_estate_id]
    return pool.find_one(sql, param)
Ejemplo n.º 22
0
def get_cart_by_chi_name(chi_name):
    sql = """select * from cart where chi_name = %s"""
    param = [chi_name]
    return pool.find_one(sql, param)
Ejemplo n.º 23
0
 def work(self):
     delete_logs()
     options = webdriver.ChromeOptions()
     options.add_argument("headless")
     web_driver_manager = WebDriverManager(1, "chrome", options)
     house_driver = web_driver_manager.get_web_driver(True)
     # 统计数据
     buliding_id = 0
     real_estate_id = 0
     while True:
         try:
             house = pool.find_one(self.base_select_sql)
             if not house:
                 logger.info(u"数据收集完成")
                 break
             if not house.get("web_house_id"):
                 continue
             house_driver.send_url(
                 (self.base_house_url % house.get("web_house_id")))
             # 截图整个网页
             house_driver.save_screenshot(self.save_image_url)
             # 保存图片
             img = house_driver.find_element_by_tag_name("img")
             location_img_url = self.save_image_url
             left = img.location.get("x")
             top = img.location.get("y")
             width = left + img.size.get("width")
             height = top + img.size.get("height")
             image = Image.open(location_img_url).crop(
                 (left, top, width, height))
             image.save(location_img_url)
             # 防止图片没有保存下来
             time.sleep(3)
             # 识别图片
             image_recognition = ImageRecognition(self.base_image_path,
                                                  self.save_image_url)
             expression, int_code = image_recognition.get_expression_code()
             # 发送验证码请求
             code_input = house_driver.find_element_by_id("txtCode")
             code_input.send_keys(int_code)
             house_driver.find_element_by_id("Button1").click()
             one_house_url = house_driver.current_url
             if "bid" in one_house_url:
                 # 保存成功的图片
                 image_recognition.save_success_image(
                     self.save_image_url, expression)
                 # 收集数据
                 one_house_soup = BeautifulSoup(house_driver.page_source,
                                                "html.parser")
                 if not one_house_soup.find("img"):
                     raise BaseException(u"无法获取房子数据")
                 one_house_data = unquote(
                     one_house_soup.find(
                         "img", attrs={
                             "id": "roomInfo_img"
                         }).attrs.get("src").split("text=")[1].replace(
                             "%u", "\\u").decode("unicode-escape"))
                 if not one_house_data:
                     raise BaseException(u"无法获取房子数据")
                 if one_house_data and "undefined-undefined" in one_house_data:
                     raise BaseException(u"无法获取房子数据")
                 json_data = json.loads(one_house_data)
                 # if json_data.get("HX") == u"其他":
                 #     continue
                 house_status = chinese_status.get(
                     json_data.get("FWZT")) if chinese_status.get(
                         json_data.get("FWZT")) else 7
                 inside_area = json_data.get("TNMJ")
                 built_area = json_data.get("JZMJ")
                 house_type = json_data.get("HX")
                 inside_price = json_data.get("NSDJ_TN")
                 built_price = json_data.get("NSDJ_JM")
                 pool.commit(self.base_update_sql, [
                     house_status, inside_area, built_area, house_type,
                     inside_price, built_price,
                     datetime.datetime.now(),
                     house.get("id")
                 ])
                 logger.info(u"thread:%s, %s:套内单价:%s, 套内面积:%s" %
                             (self.thread_no, house.get("door_number"),
                              inside_price, inside_area))
                 # 统计数据
                 # 不同大楼,此时统计该栋楼的数据
                 if buliding_id and buliding_id != house.get("buliding_id"):
                     sql_count_house = """select * from
                                   (select count(1) as sale_number from house where buliding_id=%s and status=2) as a, 
                                   (select count(1) as total_number from house where buliding_id=%s) as b, 
                                   (select count(1) as sold_number from house where `status` in (3,4,5) and buliding_id=%s) as c"""
                     result_count_house = pool.find_one(
                         sql_count_house,
                         [buliding_id, buliding_id, buliding_id],
                         sql_analysis=False)
                     sql_update_buliding = """update building set sale_residence_count=%s, total_count=%s, sale_count=%s, updated=%s where id=%s"""
                     pool.commit(sql_update_buliding, [
                         result_count_house[0], result_count_house[1],
                         result_count_house[2],
                         datetime.datetime.now(), buliding_id
                     ])
                     buliding_id = house.get("buliding_id")
                 # 不同楼盘,此时统计楼盘数据
                 if real_estate_id and real_estate_id != house.get(
                         "real_estate_id"):
                     sql_count_buliding = """select sum(sale_residence_count), sum(total_count), sum(sale_count) from building where real_estate_id=%s"""
                     result_count_buliding = pool.find_one(
                         sql_count_buliding, [real_estate_id])
                     sql_update_real_estate = """update real_estate set sale_count=%s, house_total_count=%s, house_sell_out_count=%s, updated=%s where id=%s"""
                     pool.commit(sql_update_real_estate, [
                         result_count_buliding.get(
                             "sum(sale_residence_count)"),
                         result_count_buliding.get("sum(total_count)"),
                         result_count_buliding.get("sum(sale_count)"),
                         datetime.datetime.now(), real_estate_id
                     ])
                     real_estate_id = house.get("real_estate_id")
                 if not buliding_id:
                     buliding_id = house.get("buliding_id")
                     real_estate_id = house.get("real_estate_id")
         except BaseException as e:
             logger.error(e)
             try:
                 web_driver_manager.destory_web_driver(
                     house_driver.get_id())
             except BaseException as e2:
                 print e2
                 command = u"taskkill /F /IM chromedriver.exe"
                 os.system(command)
             house_driver = web_driver_manager.get_web_driver(True)