Ejemplo n.º 1
0
        data["油耗"] = fuel_consum
        car_year = html.xpath(
            '//ul[@class="cd_m_info_desc"]/li[1]/span[1]/text()')
        data["年份"] = car_year
        return data

    def get_pro_data(self, html):
        pass

        # return data
        #以下爬取的是车辆的质量检测报告项目


if __name__ == '__main__':
    r = redis_or.Redis_Data()
    h = master.Master_Spider("shenzhen")
    s = Slave_Spisder()
    f = open("shenzhen_new_data", "a", encoding="utf-8")
    for i in range(2000):
        try:
            data = r.pop_data("shenzhen_youxin")
            html = h.get_html("http://" + data)
            # time.sleep(1.2)
            detai = s.parse_detail_data(html)
            print(detai)
            f.write(str(detai) + "\n")
        except:
            # f.close()
            print("异常的连接", data)
            continue
Ejemplo n.º 2
0
import master
import redis_or
import slave

# a=master.Master_Spider("shenzhen")
# html = a.get_html("https://www.xin.com/beijing/benchi/i3/")
# urls=a.get_detail_url(html)
q = redis_or.Redis_Data()
# for url in urls:
#     q.set_into_data("test_car_urls",url)

url = q.pop_data("test_car_urls")
html = master.Master_Spider("shenzhen").get_html(url)
a = slave.Slave_Spisder()
data = a.parse_detail_data(html)
print(data)
import master
import redis_or
import slave
from lxml import etree

# a=master.Master_Spider("shenzhen")
# html = a.get_html("https://www.xin.com/beijing/benchi/i3/")
# urls=a.get_detail_url(html)
q = redis_or.Redis_Data()
# for url in urls:
#     q.set_into_data("test_car_urls",url)
for i in range(1, 11):
    url = q.pop_data("test_car_urls")
    # print(url)
    html = master.Master_Spider("shenzhen").get_html("https://" + url)
    print(type(html))
    a = slave.Slave_Spisder()
    data = a.parse_detail_data(html)
    print(data)
Ejemplo n.º 4
0
        data["变速箱"] = transmiss
        fuel_mode = html.xpath('//div[@class="cd_m_i_pz"]/dl[3]/dd[4]/span[2]/text()')
        data["燃油类型"] = fuel_mode
        drive_mode = html.xpath('//div[@class="cd_m_i_pz"]/dl[3]/dd[5]/span[2]/text()')
        data["驱动形式"] = drive_mode
        fuel_consum = html.xpath('//div[@class="cd_m_i_pz"]/dl[3]/dd[6]/span[2]/text()')
        data["油耗"] = fuel_consum
        items = html.xpath('/html/body/div[2]/div[13]/div/div[4]/div[1]/dl[1]/dd[1]/span[2]')
        print(items)

        return data
        #以下爬取的是车辆的质量检测报告项目



if __name__ == '__main__':
    html = master.Master_Spider("shenzhen").get_html("'https://www.xin.com/yrek41mkmg/che69941841.html?channel=a49b117c44837d110753e751863f53")
    a = Slave_Spisder()
    data = a.parse_detail_data(html)
    print(data)
    # fir = mongo_or.Mongo_Data()
    # db = fir.create_database("XIAOMI")
    # coll = fir.create_collection("LAO_JAY",db)
    # fir.insert_data(data,coll)