Example #1
0
def get_format_product(url, product_model):
    try:
        html_str = get_html(url)
        products = []
        products_container = html_str.find(attrs={"class": "mProList"})
        if products_container:
            products_div = products_container.findAll(attrs={"class": "l"})
            for product_div in products_div:
                product = {}
                product_name_a = product_div.find("a")
                product_index_href = product_name_a.attrs["href"]
                product_index_name = product_name_a.text
                product_descs_lis = product_div.findAll("li")
                for product_descs_li in product_descs_lis:
                    key = product_descs_li.find("i").text
                    value = product_descs_li.find("span").text
                    product[key.strip()] = value.strip()
                product['product_index_href'] = product_index_href
                product['product_index_name'] = product_index_name
                product['_id'] = get_next_id(product_model)
                product['version'] = current_version
                products.append(product)
            return products
    except BaseException as e:
        print('错误:', e)
        except_handler(url, product_model)
        return []
def che360_engine_clean():
    has_next = True
    for i in range(0, 100000):
        page_size = 10
        coll = che360_engine_model_detail.find({}).sort(
            '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size)
        if not has_next:
            return
        has_next = False
        for data in coll:
            has_next = True
            engine = {}
            engine["desc"] = data.get("cell_model_name", "")
            engine["engine_model"] = data.get("发动机:", "")
            engine["series"] = data.get("系列:", "")
            engine["engine_supp"] = data.get("发动机厂商:", "")
            engine["cylinders_num"] = data.get("汽缸数:", "")
            engine["fuel_type"] = data.get("燃料种类:", "")
            engine["cc"] = data.get("排量:", "")
            engine["max_power"] = data.get("最大输出功率:", "")
            engine["max_hp"] = data.get("最大马力:", "")
            engine["engine_type"] = data.get("发动机形式:", "")
            engine["nick_name"] = data.get("nick_name", "")
            engine["_id"] = get_next_id("engine_model_detail")
            print(engine)
            print()
            engine_model_detail.insert_one(engine)
Example #3
0
def get_format_product_detail(parent_collection, detail_collection, version):
    models_josn = truck_parts_db[parent_collection].find({
        "version": version
    }, {
        "_id": 1,
        "product_index_href": 1,
        "version": 1
    }).distinct('product_index_href')

    for model in models_josn:
        model_index_url = model
        # ["product_index_href"]
        model_param_url = model_index_url.replace("index", "param")
        url = "https://product.360che.com" + model_param_url
        print(url)
        html = get_html(url)
        product_detail_container = html.find(attrs={"class": "parameter-detail"})
        product_detail_num = sum(1 for _ in product_detail_container.find(
            "tr",
            attrs={
                "id": "fixed_top"
            },
        ).findAll("th"))
        product_details = {}
        for i in range(1, product_detail_num):
            product_details[i] = {}
        rows = product_detail_container.findAll("tr")
        for row_data in rows:
            if row_data.get('id', "") == "fixed_top":
                for i in range(1, product_detail_num):
                    cell_model_name = row_data.findAll("th")[i].find(
                        'a').string
                    product_details[i]["cell_model_name"] = cell_model_name
            if row_data.get('class', "") == ["param-row"]:
                row_id = row_data.findAll("td")[0].text
                for i in range(1, product_detail_num):
                    value_content_td = row_data.findAll("td")
                    if value_content_td and len(value_content_td) > i:
                        value_content = value_content_td[i]
                        if value_content:
                            value = value_content.find('div').text
                            product_details[i][row_id] = value.strip()
        for product_details in product_details.values():
            product_details["_id"] = get_next_id('filter_model_detail')
            product_details["parent_id"] = model
            product_details["version"] = version
            truck_parts_db[detail_collection].insert(product_details)

        query = {"product_index_href": model}
        newvalues = {"$set": {"version": version + 1}}
        update_ret = truck_parts_db[parent_collection].update_many(query, newvalues)
        print(update_ret.modified_count)
Example #4
0
 def __init__(self, name, contact_email, description, image_url, zip_code,
              latitude, longitude, instructions, address, accepts_opened,
              city, state):
     self.id = utils.get_next_id()
     self.name = name
     self.contact_email = contact_email
     self.description = description
     self.image_url = image_url
     self.zip_code = zip_code
     self.latitude = latitude
     self.longitude = longitude
     self.instructions = instructions
     self.address = address
     self.accepts_opened = accepts_opened
     self.city = city
     self.state = state
def eurocvbay_parts_clean():
    has_next = True
    for i in range(0, 100000):
        page_size = 10
        coll = che360_eurocvbay_parts_clean.find({}).sort(
            '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size)
        if not has_next:
            return
        has_next = False
        for data in coll:
            has_next = True
            filter = {}
            filter["_id"] = get_next_id("filter_model_detail")
            filter["desc"] = data.get("product_name", "")
            filter["replaces"] = data.get("replaces", "")
            print(filter)
            print()
def che360_truck_clean():
    has_next = True
    for i in range(0, 100000):
        page_size = 10
        coll = che360_truck_model_detail.find({}).sort(
            '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size)
        if not has_next:
            return
        has_next = False
        for data in coll:
            has_next = True
            truck = {}
            truck["desc"] = data.get("cell_model_name", "")
            truck["announcement_model"] = data.get("公告型号:", "")
            truck["drive_model"] = data.get("驱动形式:", "")
            truck["desc"] = data.get("轴距:", "")
            truck["engine_model"] = data.get("发动机:", "")
            truck["transmission_model"] = data.get("变速箱:", "")
            truck["length"] = data.get("车身长度:", "")
            truck["width"] = data.get("车身宽度:", "")
            truck["height"] = data.get("车身高度:", "")
            truck["weight"] = data.get("整车重量:", "")
            truck["capacity_kg"] = data.get("额定载重:", "")
            truck["tonnage_level"] = data.get("吨位级别:", "")
            truck["engine_brand"] = data.get("发动机品牌:", "")
            truck["cylinders_num"] = data.get("汽缸数:", "")
            truck["fuel_type"] = data.get("燃料种类:", "")
            truck["cc"] = data.get("排量:", "")
            truck["max_hp"] = data.get("最大马力:", "")
            truck["max_power"] = data.get("最大输出功率:", "")
            truck["engine_type"] = data.get("发动机形式:", "")
            truck["transmission_brand"] = data.get("变速箱品牌:", "")
            truck["forward_gears_num"] = data.get("前进挡位:", "")
            truck["reverse_gears_num"] = data.get("倒挡数:", "")
            truck["tyre_type"] = data.get("轮胎规格:", "")
            truck["tyre_num"] = data.get("轮胎数:", "")
            truck["tyre_num"] = data.get("弹簧片数:", "")
            truck["brand"] = data.get("brand_name", "")
            truck["model"] = data.get("model_name", "")
            truck["_id"] = get_next_id("truck_model_detail")
            print(truck)
            print()
            truck_model_detail.insert_one(truck)
def cn357_truck_clean():
    has_next = True
    for i in range(0, 10000):
        page_size = 10
        coll = cn357_truck_model_detail.find({}).sort(
            '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size)
        if not has_next:
            return
        has_next = False
        for data in coll:
            has_next = True
            truck = {}
            truck["desc"] = data.get("product", "")
            truck["announcement_model"] = data.get("整车公告:", "")
            truck["brand"] = data.get("品牌:", "")
            truck["series"] = data.get("车系:", "")
            truck["purpose"] = data.get("用途:", "")
            truck["drive_model"] = data.get("驱动方式:", "")
            truck["tonnage_level"] = data.get("吨位级别:", "")
            truck["manufacturer"] = data.get("生产厂家:", "")
            truck["origin"] = data.get("整车产地:", "")
            truck["weight"] = data.get("整车重量:", "")
            truck["length"] = data.get("整车长度:", "")
            truck["width"] = data.get("整车宽度:", "")
            truck["height"] = data.get("整车高度:", "")
            truck["member"] = data.get("准乘人数:", "")
            truck["engine_model"] = data.get("发动机型号:", "")
            truck["engine_type"] = data.get("发动机形式:", "")
            truck["max_power"] = data.get("最大功率:", "")
            truck["max_hp"] = data.get("最大马力:", "")
            truck["cc"] = data.get("排量:", "")
            truck["fuel_type"] = data.get("燃油种类:", "")
            truck["transmission_model"] = data.get("变速箱型号:", "")
            truck["forward_gears_num"] = data.get("前进档位数:", "")
            truck["reverse_gears_num"] = data.get("倒档档位数:", "")
            truck["chassis_models"] = data.get("底盘型号:", "")
            truck["plate_spring_num"] = data.get("板簧片数:", "")
            truck["tyre_num"] = data.get("轮胎数量:", "")
            truck["tyre_type"] = data.get("轮胎规格:", "")
            truck["_id"] = get_next_id("truck_model_detail")
            print(truck)
            print()
            truck_model_detail.insert_one(truck)
def cn357_filter_clean():
    has_next = True
    for i in range(0, 100000):
        page_size = 10
        coll = cn357_filter_model_detail.find({}).sort(
            '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size)
        if not has_next:
            return
        has_next = False
        for data in coll:
            has_next = True
            filter = {}
            filter["brand"] = data.get("品牌:", "").split(":")[1]
            models_str = data.get("型号:", "").split(":")[1]
            models = models_str.split("/")
            for model in models:
                filter["model"] = model
                filter["_id"] = get_next_id("filter_model_detail")
                print(filter)
                filter_model_detail.insert_one(filter)
Example #9
0
def get_format_product(url, product_model):
    html_str = get_html(url)
    products = []
    product_list_ul = html_str.findAll(attrs={"class": "products-list"})[0]
    products_li = product_list_ul.findAll("li")
    for product_li in products_li:
        product = {}
        product_name_a = product_li.find("h5").find("a")
        product_index_href = product_name_a.attrs["href"]
        product_index_name = product_name_a.text
        product_span = product_li.findAll(
            attrs={"class": "content"})[0].find("span")
        product_items = product_span.findAll("p")
        for product_item in product_items:
            item_type = product_item.find("span")
            product[item_type.next_element.strip(
            )] = item_type.next_sibling.strip()
        product['product_index_href'] = product_index_href
        product['product_index_name'] = product_index_name
        product['_id'] = get_next_id(product_model)
        product['version'] = current_version
        products.append(product)
    return products
def che360_filter_clean():
    has_next = True
    for i in range(0, 100000):
        page_size = 10
        coll = che360_air_filter_detail.find({}).sort(
            '_id', pymongo.ASCENDING).skip(i * page_size).limit(page_size)
        if not has_next:
            return
        has_next = False
        for data in coll:
            has_next = True
            filter = {}
            filter["_id"] = get_next_id("filter_model_detail")
            filter["desc"] = data.get("cell_model_name", "")
            filter["type"] = data.get("类型:", "")
            filter["model"] = data.get("滤清器型号:", "")
            filter["diameter"] = data.get("直径:", "")
            filter["height"] = data.get("高度:", "")
            filter["weight"] = data.get("重量:", "")
            filter["leakproof_type"] = data.get("密封结构:", "")
            filter["locating_hole_diameter"] = data.get("定位孔直径:", "")
            filter["size_model"] = data.get("尺寸型号:", "")
            filter["market_model"] = data.get("市场型号:", "")
            filter["flux"] = data.get("流量:", "")
            filter["filter_level"] = data.get("过滤级别:", "")
            filter["thread_size"] = data.get("螺纹尺寸:", "")
            filter["adaptable_truck_models"] = data.get("适用车型:", "").split("/")
            filter["adaptable_enign_models"] = data.get("适用机型:", "").split("/")
            filter["adaptable_truck_types"] = data.get("适用车类型:", "").split("/")
            filter["adaptable_engine_types"] = data.get("适用发动机类型:",
                                                        "").split("/")
            filter["alternative_parts_model"] = data.get("可替换滤清器型号:", "")
            filter["nick_name"] = data.get("可替换滤清器零件号:", "")
            print(filter)
            print()
            filter_model_detail.insert_one(filter)
Example #11
0
File: loop.py Project: quatrix/rekt
 def create_session(self):
     self.session_start_time = int(time.time())
     self.session_id = get_next_id()
     self.session_file = os.path.join(self.upload_dir, "{}.mp3".format(self.session_id))
     self.metadata_file = os.path.join(self.upload_dir, "{}.json".format(self.session_id))
     logging.info("created session, session id: %d", self.session_id)
Example #12
0
 def test_get_next_id(self):
     self.assertEqual(1, get_next_id())
     self.assertEqual(2, get_next_id())
Example #13
0
    def create(self, user_id, text):
        post_id = get_next_id()
        p = db.Post(post_id, user_id, text)
        p.save()

        return p.post_id
Example #14
0
 def create_session(self):
     self.session_start_time = int(time.time())
     self.session_id = get_next_id()
     self.session_file = os.path.join(self.upload_dir, '{}.mp3'.format(self.session_id))
     self.metadata_file = os.path.join(self.upload_dir, '{}.json'.format(self.session_id))
     logging.info('created session, session id: %d', self.session_id)