Example #1
0
 def parse_year(self, response):
     content_div = response.xpath('//div[@id="choosecar1_div_choosecar"]/div')[1]
     year_list = content_div.xpath('./ul/li/a')
     meta = response.meta
     timestamp = str(int(time.time()))
     for i, year in enumerate(year_list):
         year_name = year.xpath('./@title').extract()[0]
         json_param = year.xpath('./@choosecarparam').extract()[0]
         car_id = self.dao.get_car_id(self.source_id, 4, meta['power_id'], year_name, json_param)
         param = 'json=' + json_param + '&_=' + timestamp
         meta['car_id'] = car_id
         meta['year'] = year_name
         first_time_list = self.make_mile_and_year_group(year_name)
         for j, first_time in enumerate(first_time_list):
             meta['mile'] = first_time[1]
             meta['firsttime'] = first_time[0]
             meta['cookiejar'] = self.cookie_jar
             key = str(car_id) + '_' + meta['firsttime'] + '_' + str(meta['mile'])
             if key in self.crawled_car_run:
                 continue
             self.cookie_jar += 1
             yield ants.Request(self.url + '?' + param + '&jar=' + str(meta['cookiejar']),
                                meta=meta,
                                callback='go_to_input_page')
         if self.debug and i > 1:
             break
Example #2
0
 def parse_words(self, response):
     words_div = response.xpath('//div[@class="brandChoose"]/ul/li')[1:]
     for word_div in words_div:
         json = word_div.xpath('./a/@choosecarparam').extract()[0]
         timestamp = str(int(time.time()))
         yield ants.Request(self.url + '?' + 'json=' + str(json) + '&_=' + timestamp, callback='parse_brand')
         if self.debug:
             break
Example #3
0
 def input_mile(self, response):
     meta = response.meta
     next_request = ants.Request(
         self.domain + '/baoyang/step2.html?curmileage=' + str(
             meta['mile']) + '&firsttime=' + meta['firsttime'] + '&jar=' + str(
             meta['cookiejar']),
         self.parse_service, meta=meta)
     return next_request
Example #4
0
 def parse_service(self, response):
     detail_list = response.xpath('//div[@class="tableSetp2"]/div[contains(@class,"delist")]')
     detail_list_list = [detail_list]
     table_step2_list = response.xpath('//div[@id="dd_tablestep2mt"]')
     detail_list_list.append(table_step2_list[1].xpath('./div[contains(@class,"delist")]'))
     detail_list_list.append(table_step2_list[3].xpath('./div[contains(@class,"delist")]'))
     service_value_list = list()
     p_name_service_id_map = dict()
     for type, detail_list in enumerate(detail_list_list, start=1):
         for detail in detail_list:
             one_service_value_list = list()
             service_value_list.append(one_service_value_list)
             title = detail.xpath('./dl/dt/b/text()').extract()[0].split('.')[1].strip()
             service_div_list = detail.xpath('./div/table/tbody/tr')
             for service_div in service_div_list:
                 td_list = service_div.xpath('./td')
                 source_service_id = td_list[0].xpath('./input')[0].xpath('./@value').extract()[0]
                 second_value = td_list[0].xpath('./input')[1].xpath('./@value').extract()[0]
                 p_name = source_service_id + '_' + second_value
                 one_service_value_list.append(p_name)
                 service_name = td_list[1].xpath('./text()').extract()[0].strip()
                 service_suggest = td_list[2].xpath('./span/text()').extract()
                 if service_suggest:
                     service_suggest = service_suggest[0].strip()
                     car_suggest = td_list[2].xpath('./text()').extract()[0].strip()
                 else:
                     service_suggest = td_list[2].xpath('./text()').extract()
                     if service_suggest:
                         service_suggest = service_suggest[0].strip()
                     else:
                         service_suggest = ''
                     car_suggest = None
                 service_id = self.dao.get_car_service_id(self.source_id,
                                                          source_service_id,
                                                          title,
                                                          service_name,
                                                          service_suggest)
                 run_service_id = self.dao.get_car_run_service_id(self.source_id,
                                                                  response.meta['car_id'],
                                                                  response.meta['firsttime'],
                                                                  response.meta['mile'],
                                                                  service_id,
                                                                  type,
                                                                  car_suggest)
                 second_value_list = second_value.split('-')
                 for one_second_value in second_value_list:
                     p_name_service_id_map[source_service_id + '_' + one_second_value] = run_service_id
     meta = response.meta
     meta['service_list'] = service_value_list
     meta['next_service'] = 1
     meta['p_name_service_id_map'] = p_name_service_id_map
     timestamp = str(int(time.time()))
     return ants.Request(
         self.add_service_url + 'itemids=' + ','.join(
             service_value_list[0]) + ',&_=' + timestamp + '&jar=' + str(
             meta['cookiejar']),
         meta=meta,
         callback='add_service')
Example #5
0
 def add_service(self, response):
     meta = response.meta
     if meta['next_service'] >= len(meta['service_list']):
         return ants.Request(
             'http://www.yangche51.com/baoyang/step3.html?curmileage=' + str(meta['mile']) + '&firsttime=' +
             meta[
                 'firsttime'] + '&jar=' + str(meta['cookiejar']),
             meta=response.meta,
             callback='parse_service_goods')
     next_service = meta['next_service']
     meta['next_service'] += 1
     timestamp = str(int(time.time()))
     return ants.Request(
         self.add_service_url + 'itemids=' + ','.join(
             meta['service_list'][next_service]) + ',&_=' + timestamp + '&jar=' + str(
             meta['cookiejar']),
         meta=meta,
         callback='add_service')
Example #6
0
 def parse_brand(self, response):
     brand_list = response.xpath('//div[@id="choosecar1_div_choosecar"]/div')[1].xpath('./ul/li/a')
     for brand in brand_list:
         title = brand.xpath('./@title').extract()[0]
         json = brand.xpath('./@choosecarparam').extract()[0]
         timestamp = str(int(time.time()))
         meta = dict(response.meta)
         car_id = self.dao.get_car_id(self.source_id, 1, 0, title)
         meta['brand_id'] = car_id
         yield ants.Request(self.url + '?' + 'json=' + str(json) + '&_=' + timestamp,
                            callback='parse_series',
                            meta=meta)
         if self.debug:
             break
Example #7
0
 def parse_power(self, response):
     content_div = response.xpath('//div[@id="choosecar1_div_choosecar"]/div')[1]
     power_list = content_div.xpath('./ul/li/a')
     meta = response.meta
     for power in power_list:
         power_name = power.xpath('./@title').extract()[0]
         json = power.xpath('./@choosecarparam').extract()[0]
         timestamp = str(int(time.time()))
         car_id = self.dao.get_car_id(self.source_id, 3, meta['series_id'], power_name)
         meta['power_id'] = car_id
         yield ants.Request(self.url + '?' + 'json=' + str(json) + '&_=' + timestamp,
                            callback='parse_year',
                            meta=meta)
         if self.debug:
             break
Example #8
0
 def parse_series(self, response):
     content_div = response.xpath('//div[@id="choosecar1_div_choosecar"]/div')[1]
     nature_list = content_div.xpath('./b')
     ul_list = content_div.xpath('./ul')
     meta = response.meta
     for i, ul in enumerate(ul_list):
         nature = nature_list[i].xpath('./text()').extract()[0].strip()
         series_list = ul.xpath('./li/a')
         for series in series_list:
             series_name = series.xpath('./@title').extract()[0].strip()
             json = series.xpath('./@choosecarparam').extract()[0].strip()
             timestamp = str(int(time.time()))
             car_name = nature + ':' + series_name
             car_id = self.dao.get_car_id(self.source_id, 2, meta['brand_id'], car_name)
             meta['series_id'] = car_id
             yield ants.Request(self.url + '?' + 'json=' + str(json) + '&_=' + timestamp,
                                callback='parse_power',
                                meta=meta)
             if self.debug:
                 break
         if self.debug:
             break
Example #9
0
 def parse_service_goods(self, response):
     tr_list = response.xpath('//div[@class="tableBox"]/table/tr')
     meta = response.meta
     for tr in tr_list[2:]:
         name = tr.xpath('./td[@class="pDo"]/i/@data').extract()
         if not name:
             continue
         name = name[0]
         href = tr.xpath('./td[contains(@class,"pLink")]/a/@href').extract()
         if not href:
             continue
         href = href[0]
         source_goods_id = href[self.front_len:].split('.')[0]
         sql = 'select id from dw_crawl_goods where source_id = ' + str(
             self.source_id) + ' and source_goods_id = "' + source_goods_id + '"'
         goods_id_data = self.dao.db.get_data(sql)
         goods_id = None
         if goods_id_data:
             goods_id = goods_id_data[0]['id']
         else:
             yield ants.Request(self.front_url + source_goods_id + '.html', callback='parse_goods')
         service_id = meta['p_name_service_id_map'][name]
         self.dao.add_car_run_service_goods(self.source_id, service_id, source_goods_id, goods_id)
Example #10
0
 def parse(self, response):
     nextpage = response.xpath('//div[@id="page"]/a[contains(text(),"' + '下一页'.decode("utf-8") + '")]/@href').extract()[0]
     yield ants.Request(self.domain + nextpage, callback=self.parse)
Example #11
0
 def parse(self, response):
     param_list = [
         'json={"Alphabet":null,"AutoBrandId":0,"AutoModelId":0,"AutoModelSubId":0,"ChooseType":1,"MainAutoModelID":0,"SubIds":null,"Year":0}',
         '_:' + str(int(time.time()))
     ]
     yield ants.Request(self.url + '?' + '&'.join(param_list), callback='parse_words')
Example #12
0
 def go_to_input_page(self, response):
     meta = response.meta
     meta['follow_cookiejar'] = True
     yield ants.Request(self.domain + '/baoyang/?' + str(random.random()) + '#check',
                        callback='input_mile',
                        meta=meta)