def count_top10(): session = Session() rows = session.query( HouseModel.xiaoqu, func.count(HouseModel.xiaoqu).label('count') ).group_by(HouseModel.xiaoqu).order_by('count desc').limit(10) return [row._asdict() for row in rows]
def hot_res(): session = Session() rows = session.query( FoodModel.name, ((FoodModel.kouwei + FoodModel.huanjin + FoodModel.fuwu) * FoodModel.reviewNum).label('sum')).order_by('sum desc').limit(10) return [row._asdict() for row in rows]
def education_stat(): session = Session() rows = session.query(JobModel.education, func.count( JobModel.education).label('count')).group_by( 'education').order_by('count desc') return [row._asdict() for row in rows]
def count_top10_lei(): """商圈前十的分类 """ session = Session() rows = session.query(FoodModel.fenlei, func.count(FoodModel.fenlei).label('count')).group_by( FoodModel.fenlei).order_by('count desc').limit(10) return [row._asdict() for row in rows]
def exp_top10_lei(): """人均消费前十的分类 """ session = Session() rows = session.query( FoodModel.fenlei, func.avg(FoodModel.agvExp).cast(Float).label('agvExp')).group_by( FoodModel.fenlei).order_by('agvExp desc').limit(10) return [row._asdict() for row in rows]
def experience_stat(): session = Session() rows = session.query( func.concat(JobModel.experience_lower, '-', JobModel.experience_upper).label('experience'), func.count('experience').label('count')).group_by( 'experience').order_by('count desc') return [row._asdict() for row in rows]
def count_top10(): """职位数排名前十的城市 """ session = Session() rows = session.query( JobModel.city, func.count(JobModel.city).label('count') ).group_by(JobModel.city).order_by('count desc').limit(10) return [row._asdict() for row in rows]
def salary_top10(): session = Session() rows = session.query( JobModel.city, func.avg((JobModel.salary_lower + JobModel.salary_upper) / 2).cast(Float).label('salary')).filter( and_(JobModel.salary_lower > 0, JobModel.salary_upper > 0)).group_by( JobModel.city).order_by('salary desc').limit(10) return [row._asdict() for row in rows]
def count_top10_lei_1(): """商圈前十的分类 """ session = Session() rows = session.query(FoodModel.fenlei, func.count(FoodModel.fenlei).label('count')).group_by( FoodModel.fenlei).order_by('count desc').limit(10) res = [] for row in rows: res.append(row.fenlei) return res
def education_stat(): """学历要求统计 """ session = Session() rows = session.query( JobModel.education, func.count(JobModel.education).label('count') ).group_by('education').order_by(desc('count')) 学历要求统计 = [row._asdict() for row in rows] return 学历要求统计
def count_top10_quan(): """商圈前十的商圈 rows = session.query( JobModel.city, func.count(JobModel.city).label('count') ).group_by(JobModel.city).order_by('count desc').limit(10) """ session = Session() rows = session.query(FoodModel.quan, func.count(FoodModel.quan).label('count')).group_by( FoodModel.quan).order_by('count desc').limit(10) return [row._asdict() for row in rows]
def salary_by_city_and_education(): session = Session() rows = session.query( JobModel.city, JobModel.education, func.avg( (JobModel.salary_lower + JobModel.salary_upper) / 2).cast(Float).label('salary')).filter( and_(JobModel.salary_lower > 0, JobModel.salary_upper > 0)).group_by( JobModel.city, JobModel.education).order_by(JobModel.city.desc()) return [row._asdict() for row in rows]
def fenlei_and_quan(): """热门分类在热门商圈的人均消费对比 """ session = Session() rows = session.query( FoodModel.fenlei, FoodModel.quan, func.avg(FoodModel.agvExp).cast(Float).label('agvExp')).filter( and_(FoodModel.fenlei.in_(count_top10_lei_1()), FoodModel.quan.in_(count_top10_quan_1()))).group_by( FoodModel.fenlei, FoodModel.quan).order_by(FoodModel.quan.desc()) return [row._asdict() for row in rows]
def all_huxing(): session = Session() rows = session.query( (HouseModel.huxing).label('item'), func.count(HouseModel.huxing).label('count'), func.count(HouseModel.huxing).label('percent') ).group_by(HouseModel.huxing).order_by('count desc') result = [row._asdict() for row in rows] total = 0 for row in result: total += int(row['count']) for row in result: row['percent'] = round(row['count']/total,2) results = {'total':total,'data':result} return results
def count_top10_quan_1(): """商圈前十的商圈 rows = session.query( JobModel.city, func.count(JobModel.city).label('count') ).group_by(JobModel.city).order_by('count desc').limit(10) """ session = Session() rows = session.query(FoodModel.quan, func.count(FoodModel.quan).label('count')).group_by( FoodModel.quan).order_by('count desc').limit(10) res = [] for row in rows: res.append(row.quan) return res
def salary_top10(): """薪资排名前十的城市 """ session = Session() rows = session.query( JobModel.city, func.avg( (JobModel.salary_lower + JobModel.salary_upper) / 2 ).label('salary') ).filter( and_(JobModel.salary_lower > 0, JobModel.salary_upper > 0) ).group_by(JobModel.city).order_by(desc('salary')).limit(10) 薪资排名列表 = [row._asdict() for row in rows] # 列表中每个元素是字典,字典的 salary 值的数据类型是 Decimal # 须将其转换为 float 类型 for d in 薪资排名列表: d['salary'] = float(format(d['salary'], '.2f')) return 薪资排名列表
def salary_by_city_and_education(): """同等学历不同城市薪资对比 """ session = Session() rows = session.query( JobModel.city, JobModel.education, func.avg( (JobModel.salary_lower + JobModel.salary_upper) / 2 ).cast(Float).label('salary') ).filter( and_(JobModel.salary_lower > 0, JobModel.salary_upper > 0) ).group_by(JobModel.city, JobModel.education).order_by(JobModel.city.desc()) #return [row._asdict() for row in rows] 同等学历不同城市薪资对比 = [row._asdict() for row in rows] for i in 同等学历不同城市薪资对比: i['salary'] = float(format(i['salary'],'.2f')) return 同等学历不同城市薪资对比
class PersistentPipeline(object): """持久化数据 Pipeline """ def open_spider(self, spider): self.session = Session() def close_spider(self, spider): self.session.commit() self.session.close() def process_item(self, item, spider): if isinstance(item, JobItem): return self._process_job_item(item) else: return item def _process_job_item(self, item): city = item['city'].split('·')[0] salary_lower, salary_upper = 0, 0 m = re.match(r'[^\d]*(\d+)k-(\d+)k', item['salary']) if m is not None: salary_lower, salary_upper = int(m.group(1)), int(m.group(2)) experience_lower, experience_upper = 0, 0 m = re.match(r'[^\d]*(\d+)-(\d+)', item['experience']) if m is not None: experience_lower, experience_upper = int(m.group(1)), int( m.group(2)) tags = ' '.join(item['tags']) model = JobModel( title=item['title'], city=city, salary_lower=salary_lower, salary_upper=salary_upper, experience_lower=experience_lower, experience_upper=experience_upper, education=item['education'], tags=tags, company=item['company'], ) self.session.add(model) return item
class SeiyaPipeline(object): def open_spider(self, spider): self.session = Session() def close_spider(self, spider): self.session.commit() self.session.close() def process_item(self, item, spider): if isinstance(item, JobItem): return self._process_job_item(item) else: return item def _process_job_item(self, item): city = item['city'].split('·')[0] m = re.search(r'(\d*)k-(\d*)k', item['salary']) if m: salary_lower, salary_upper = int(m.group(1)), int(m.group(2)) else: salary_lower, salary_upper = 0, 0 m = re.search(r'(\d+)-(\d+)', item['experience']) if m: experience_lower, experience_upper = int(m.group(1)), int( m.group(2)) else: experience_lower, experience_upper = 0, 0 tags = ' '.join(item['tags']) jobdata = JobModel(title=item['title'], city=city, salary_lower=salary_lower, salary_upper=salary_upper, experience_lower=experience_lower, experience_upper=experience_upper, education=item['education'], tags=tags, company=item['company']) self.session.add(jobdata) return item
def open_spider(self, spider): #self.session = sessionmaker(bind=engine)() self.session = Session()
def all_area(): session = Session() rows = session.query(HouseModel.mianji) return [float(row._asdict()['mianji']) for row in rows]
def open_spider(self, spider): self.session = Session()
class PersistentPipeline(object): """持久化数据 Pipeline """ def open_spider(self, spider): self.session = Session() def close_spider(self, spider): self.session.commit() self.session.close() def process_item(self, item, spider): if isinstance(item, JobItem): return self._process_job_item(item) elif isinstance(item, FoodItem): return self._process_food_item(item) elif isinstance(item, HouseItem): return self._process_house_item(item) else: return item def _process_job_item(self, item): city = item['city'].split('·')[0] salary_lower, salary_upper = 0, 0 m = re.match(r'[^\d]*(\d+)k-(\d+)k', item['salary']) if m is not None: salary_lower, salary_upper = int(m.group(1)), int(m.group(2)) experience_lower, experience_upper = 0, 0 m = re.match(r'[^\d]*(\d+)-(\d+)', item['experience']) if m is not None: experience_lower, experience_upper = int(m.group(1)), int( m.group(2)) tags = ' '.join(item['tags']) model = JobModel( title=item['title'], city=city, salary_lower=salary_lower, salary_upper=salary_upper, experience_lower=experience_lower, experience_upper=experience_upper, education=item['education'], tags=tags, company=item['company'], ) self.session.add(model) return item def _process_food_item(self, item): agvExp, score = 0, 0 m = re.match(r'¥(\d+)', item['agvExp']) if m is not None: agvExp = int(m.group(1)) m = re.match(r'(.*)(\d{2})', item['score']) if m is not None: score = int(m.group(2)) / 10.0 model = FoodModel( label=item['label'], name=item['name'], score=score, reviewNum=int(item['reviewNum']), agvExp=agvExp, fenlei=item['fenlei'], quan=item['quan'], addr=item['addr'], kouwei=float(item['kouwei']), huanjin=float(item['huanjin']), fuwu=float(item['fuwu']), ) self.session.add(model) return item def _process_house_item(self, item): mianji = item['mianji'].split('平米')[0] louceng = item['other'][0] years = item['other'][1] model = HouseModel( area=item['area'], name=item['name'], xiaoqu=item['xiaoqu'].replace("\xa0\xa0", ""), huxing=item['huxing'].replace("\xa0\xa0", ""), mianji=float(mianji), chaoxiang=item['chaoxiang'], quan=item['quan'], louceng=louceng, years=years, labels=' '.join(item['labels']), price=int(item['price']), ) self.session.add(model) return item