Beispiel #1
0
    def parse_list(self, response):
        data = json.loads(response.body)
        m = response.meta['data']
        for entry in data['stationList']:
            item = BusStatoinItem()
            item['station_id'] = entry['PK_STATION_ID']
            item['city'] = m['city_name']
            item['prov'] = m['prov_name']
            item['name'] = entry['STATION_NAME']
            if 'STATION_ADDRESS' in entry:
                item['addr'] = entry['STATION_ADDRESS']
            if 'TELPHONE' in entry:
                item['tel'] = entry['TELPHONE']

            key = self.baidu_key.values()[random.randint(
                0,
                len(self.baidu_key) - 1)]
            url = u'http://api.map.baidu.com/geocoder/v2/?ak=%s&output=json&address=%s' % (
                key, item['name'])
            if utils.get_short_loc(item['city']) not in item['name']:
                url += u'&city=%s' % item['city']
            yield Request(url=url,
                          callback=self.parse_addr,
                          meta={
                              'item': item,
                              'key': key
                          })
Beispiel #2
0
    def parse_addr(self, response):
        item = response.meta['item']
        try:
            result = json.loads(response.body)['result']
            loc = result['location']
            lat, lng = loc['lat'], loc['lng']

            item['blat'] = lat
            item['blng'] = lng
            yield item

        except KeyError:
            if 'stop_flag' in response.meta or 'addr' not in item or not item[
                    'addr']:
                yield item
            else:
                key = self.baidu_key.values()[random.randint(
                    0,
                    len(self.baidu_key) - 1)]
                url = u'http://api.map.baidu.com/geocoder/v2/?ak=%s&output=json&address=%s' % (
                    key, item['addr'])
                if utils.get_short_loc(item['city']) not in item['addr']:
                    url += u'&city=%s' % item['city']
                yield Request(url=url,
                              callback=self.parse_addr,
                              meta={
                                  'item': item,
                                  'key': key,
                                  'stop_flag': True
                              })
Beispiel #3
0
    def process_item(self, item, spider):
        col_loc = get_mongodb('geo', 'Locality', profile='mongodb-general')

        # get country
        country_code = item['country_code']
        if country_code not in QyerCityProcPipeline.country_map:
            col_country = get_mongodb('geo', 'Country', profile='mongodb-general')
            country = col_country.find_one({'code': country_code})
            assert country != None
            QyerCityProcPipeline.country_map[country_code] = country
        else:
            country = QyerCityProcPipeline.country_map[country_code]

        city_id = item['city_id']
        city = col_loc.find_one({'source.qyer.id': city_id})

        if not city:
            city = col_loc.find_one({'alias': item['zh_name'].lower(),
                                     'location': {
                                         '$near': {'type': 'Point', 'coordinates': [item['lng'], item['lat']]}},
                                     'country._id': country['_id']})
            if city:
                dist = utils.haversine(city['location']['coordinates'][0], city['location']['coordinates'][1],
                                       item['lng'], item['lat'])
                if dist > 100:
                    city = {}

        if not city:
            city = {}

        city['enName'] = item['en_name']
        zh_name = item['zh_name']
        short_name = utils.get_short_loc(zh_name)
        city['zhName'] = short_name

        alias1 = city['alias'] if 'alias' in city and city['alias'] else []
        alias2 = item['alias'] if 'alias' in item and item['alias'] else []
        alias1.extend(alias2)
        alias1.append(short_name)
        city['alias'] = list(set(filter(lambda val: val, [tmp.lower().strip() for tmp in alias1])))

        source = city['source'] if 'source' in city else {}
        source['qyer'] = {'id': item['city_id'], 'url': item['url']}
        city['source'] = source
        city['country'] = {'id': country['_id'], '_id': country['_id']}
        for k in ('enName', 'zhName'):
            if k in country:
                city['country'][k] = country[k]

        city['level'] = 2
        city['desc'] = item['desc']
        city['imageList'] = item['imageList']
        city['images'] = []
        city['location'] = {'type': 'Point', 'coordinates': [item['lng'], item['lat']]}
        city['abroad'] = country_code != 'CN'
        city['isHot'] = item['is_hot'] > 0

        col_loc.save(city)

        return item
Beispiel #4
0
    def parse_list(self, response):
        data = json.loads(response.body)
        m = response.meta['data']
        for entry in data['stationList']:
            item = BusStatoinItem()
            item['station_id'] = entry['PK_STATION_ID']
            item['city'] = m['city_name']
            item['prov'] = m['prov_name']
            item['name'] = entry['STATION_NAME']
            if 'STATION_ADDRESS' in entry:
                item['addr'] = entry['STATION_ADDRESS']
            if 'TELPHONE' in entry:
                item['tel'] = entry['TELPHONE']

            key = self.baidu_key.values()[random.randint(0, len(self.baidu_key) - 1)]
            url = u'http://api.map.baidu.com/geocoder/v2/?ak=%s&output=json&address=%s' % (key, item['name'])
            if utils.get_short_loc(item['city']) not in item['name']:
                url += u'&city=%s' % item['city']
            yield Request(url=url, callback=self.parse_addr, meta={'item': item, 'key': key})
Beispiel #5
0
    def parse_addr(self, response):
        item = response.meta['item']
        try:
            result = json.loads(response.body)['result']
            loc = result['location']
            lat, lng = loc['lat'], loc['lng']

            item['blat'] = lat
            item['blng'] = lng
            yield item

        except KeyError:
            if 'stop_flag' in response.meta or 'addr' not in item or not item['addr']:
                yield item
            else:
                key = self.baidu_key.values()[random.randint(0, len(self.baidu_key) - 1)]
                url = u'http://api.map.baidu.com/geocoder/v2/?ak=%s&output=json&address=%s' % (key, item['addr'])
                if utils.get_short_loc(item['city']) not in item['addr']:
                    url += u'&city=%s' % item['city']
                yield Request(url=url, callback=self.parse_addr, meta={'item': item, 'key': key, 'stop_flag': True})
Beispiel #6
0
    def process_item(self, item, spider):
        col_loc = get_mongodb('geo', 'Locality', profile='mongodb-general')

        # get country
        country_code = item['country_code']
        if country_code not in QyerCityProcPipeline.country_map:
            col_country = get_mongodb('geo',
                                      'Country',
                                      profile='mongodb-general')
            country = col_country.find_one({'code': country_code})
            assert country != None
            QyerCityProcPipeline.country_map[country_code] = country
        else:
            country = QyerCityProcPipeline.country_map[country_code]

        city_id = item['city_id']
        city = col_loc.find_one({'source.qyer.id': city_id})

        if not city:
            city = col_loc.find_one({
                'alias': item['zh_name'].lower(),
                'location': {
                    '$near': {
                        'type': 'Point',
                        'coordinates': [item['lng'], item['lat']]
                    }
                },
                'country._id': country['_id']
            })
            if city:
                dist = utils.haversine(city['location']['coordinates'][0],
                                       city['location']['coordinates'][1],
                                       item['lng'], item['lat'])
                if dist > 100:
                    city = {}

        if not city:
            city = {}

        city['enName'] = item['en_name']
        zh_name = item['zh_name']
        short_name = utils.get_short_loc(zh_name)
        city['zhName'] = short_name

        alias1 = city['alias'] if 'alias' in city and city['alias'] else []
        alias2 = item['alias'] if 'alias' in item and item['alias'] else []
        alias1.extend(alias2)
        alias1.append(short_name)
        city['alias'] = list(
            set(
                filter(lambda val: val,
                       [tmp.lower().strip() for tmp in alias1])))

        source = city['source'] if 'source' in city else {}
        source['qyer'] = {'id': item['city_id'], 'url': item['url']}
        city['source'] = source
        city['country'] = {'id': country['_id'], '_id': country['_id']}
        for k in ('enName', 'zhName'):
            if k in country:
                city['country'][k] = country[k]

        city['level'] = 2
        city['desc'] = item['desc']
        city['imageList'] = item['imageList']
        city['images'] = []
        city['location'] = {
            'type': 'Point',
            'coordinates': [item['lng'], item['lat']]
        }
        city['abroad'] = country_code != 'CN'
        city['isHot'] = item['is_hot'] > 0

        col_loc.save(city)

        return item