Example #1
0
def dump_shops():
    try:
        for cityName in DUMP_CITY_NAMES:
            city = City(cityName)
            inject_cookie(city.headers)
            inject_cookie(city.map_headers)
            city.get()
            results = city.search(DUMP_KEYWORD)
            add_results_to_output(results, DUMP_FILE_SHOPS)
    finally:
        logger.info(f'Data dumped to {DUMP_FILE_SHOPS}')
Example #2
0
 def to_dict(self):
     ''' Returns a hash of the Place in the database '''
     data = {}
     city = City.get(City.id == self.city)
     owner = User.get(User.id == self.owner)
     data['owner_id'] = owner.id
     data['city_id'] = city.id
     data['name'] = self.name
     data['description'] = self.description
     data['number_rooms'] = self.number_rooms
     data['number_bathrooms'] = self.number_bathrooms
     data['max_guest'] = self.max_guest
     data['price_by_night'] = self.price_by_night
     data['latitude'] = self.latitude
     data['longitude'] = self.longitude
     return super(Place, self).to_dict(self, data)
Example #3
0
	def to_dict(self):
		owner = User.get(User.id == self.owner)
		city = City.get(City.id == self.city)

		return {	'id': self.id,
					'created_at': self.created_at.strftime('%Y/%m/%d %H:%M:%S'),
					'updated_at': self.updated_at.strftime('%Y/%m/%d %H:%M:%S'),
					'owner_id': owner.id,
					'city_id': city.id,
					'name': self.name,
					'description': self.description,
					'number_rooms': self.number_rooms,
					'number_bathrooms': self.number_bathrooms,
					'max_guest': self.max_guest,
					'price_by_night': self.price_by_night,
					'latitude': self.latitude,
					'longitude': self.longitude	}
Example #4
0
    def to_dict(self):
        owner = User.get(User.id == self.owner)
        city = City.get(City.id == self.city)

        return {
            'id': self.id,
            'created_at': self.created_at.strftime('%Y/%m/%d %H:%M:%S'),
            'updated_at': self.updated_at.strftime('%Y/%m/%d %H:%M:%S'),
            'owner_id': owner.id,
            'city_id': city.id,
            'name': self.name,
            'description': self.description,
            'number_rooms': self.number_rooms,
            'number_bathrooms': self.number_bathrooms,
            'max_guest': self.max_guest,
            'price_by_night': self.price_by_night,
            'latitude': self.latitude,
            'longitude': self.longitude
        }
Example #5
0
from city import City
from dbhelper import Database
from config import MongoDB

if __name__ == '__main__':
    db = Database(MongoDB)
    beijing = City('南宁', searchDB=db)
    beijing.get()
    results = beijing.search(keyword='朝阳广场地铁站',
                             category='美食',
                             save=True,
                             details=False)
Example #6
0
class CitySpider(object):
    """
    爬取某城市中店铺评论
    """
    def __init__(self, cityName, area=None):
        self.cityName = cityName
        self.area = area
        self.city = City(cityName,
                         searchDB=Database(MongoDB),
                         commentsDB=Database(MongoDB))
        self.city.get()
        self.category_list = []
        self.coa_category = []
        self.fin_category = []
        self.process_category(self.city.category, self.category_list)
        self.coarsness_category(self.category_list)
        self.fine_grained_category(self.category_list)

    def get_area(self, save=False):
        """
        获取该城市所有的区
        :param save: 是否将城市所有区保存到数据库
        """
        area_list = []
        for item in self.city.locations:
            try:
                if (item['text'] == '全部地区'):
                    continue
                area_list.append(item['text'])
            except:
                logger.debug(f'获取城市分区失败:[城市:{self.cityName}]')
        logger.info(f'获取 “{self.cityName}” 所有区成功.')
        if save:
            areaDB = init_area_db(Database(MongoDB))
            areaDB.save({'area': area_list}, self.cityName)
            logger.info(f'已将 “{self.cityName}” 所有区信息保存到数据库中.')
        return area_list

    def process_category(self, obj, category_list):
        """
        处理分类数据
        :return: category_list
        """
        for item in obj:
            if 'children' in item.keys():
                category_list.append({item['text']: []})
                self.process_category(
                    item['children'],
                    category_list[len(category_list) - 1][item['text']])
            else:
                category_list.append(item['text'])
        return category_list

    def coarsness_category(self, category_list):
        for item in category_list:
            if isinstance(item, str):
                self.coa_category.append(item)
            elif isinstance(item, dict):
                self.coa_category.append(list(item.keys())[0])

    def fine_grained_category(self, category_list):
        for item in category_list:
            if isinstance(item, str):
                self.fin_category.append(item)
            elif isinstance(item, dict):
                self.fine_grained_category(item[list(item.keys())[0]])

    def get_category(self, save=False):
        """
        获取该城市所有的店铺分类结果
        :param save: 是否将城市所有店铺分类保存到数据库
        """
        logger.info(f'获取 “{self.cityName}” 所有店铺分类成功.')
        if save:
            categoryDB = init_category_db(Database(MongoDB))
            categoryDB.save({'category': self.category_list},
                            tname=self.cityName)
            logger.info(f'已将 “{self.cityName}” 所有店铺分类信息保存到数据库中.')
        return self.city.category

    def save_shop_info(self):
        """
        获取该城市所有的店铺的信息,并保存在数据库中
        """
        for area in self.get_area():
            for category in self.fin_category:
                if category == '全部分类':
                    continue
                # self.city.search('', category=category, location=area, filter=None, sort='按人气排序', save=True, details=True, comments=False)
                self.city.async_search('',
                                       category=category,
                                       location=area,
                                       filter=None,
                                       sort='按人气排序',
                                       save=True,
                                       details=False,
                                       comments=False)
        return True

    def save_shop_comments(self):
        """
        获取该城市所有的店铺的评论信息,并保存在数据库中
        """
        self.city.get_comments(self.get_area(), self.fin_category)
        return True
Example #7
0
 '山西': {'areaId': 1, 'provinceId': '4'}, '内蒙古': {'areaId': 1, 'provinceId': '5'},
 '辽宁': {'areaId': 2, 'provinceId': '6'}, '吉林': {'areaId': 2, 'provinceId': '7'},
 '黑龙江': {'areaId': 2, 'provinceId': '8'}, '上海': {'areaId': 3, 'provinceId': '9'},
 '江苏': {'areaId': 3, 'provinceId': '10'}, '浙江': {'areaId': 3, 'provinceId': '11'},
 '安徽': {'areaId': 3, 'provinceId': '12'}, '福建': {'areaId': 3, 'provinceId': '13'},
 '江西': {'areaId': 3, 'provinceId': '14'}, '山东': {'areaId': 3, 'provinceId': '15'},
 '河南': {'areaId': 4, 'provinceId': '16'}, '湖北': {'areaId': 4, 'provinceId': '17'},
 '湖南': {'areaId': 4, 'provinceId': '18'}, '广东': {'areaId': 4, 'provinceId': '19'},
 '广西': {'areaId': 4, 'provinceId': '20'}, '海南': {'areaId': 4, 'provinceId': '21'},
 '重庆': {'areaId': 5, 'provinceId': '22'}, '四川': {'areaId': 5, 'provinceId': '23'},
 '贵州': {'areaId': 5, 'provinceId': '24'}, '云南': {'areaId': 5, 'provinceId': '25'},
 '西藏': {'areaId': 5, 'provinceId': '26'}, '陕西': {'areaId': 6, 'provinceId': '27'},
 '甘肃': {'areaId': 6, 'provinceId': '28'}, '青海': {'areaId': 6, 'provinceId': '29'},
 '宁夏': {'areaId': 6, 'provinceId': '30'}, '新疆': {'areaId': 6, 'provinceId': '31'},
 '香港': {'areaId': 7, 'provinceId': '32'}, '澳门': {'areaId': 7, 'provinceId': '33'},
 '台湾': {'areaId': 7, 'provinceId': '34'}}
"""

sh = City('上海')
# url = sh.url

sh.get()
# locations = sh.locations

results = sh.search('咖啡',
                    category='咖啡厅',
                    location='嘉定区',
                    save=False,
                    details=True)
print(results)