Ejemplo n.º 1
0
 def test_get_finance_stage_statistic(self):
     test_jobs_model = [JobModel(company_id=1), JobModel(company_id=2), JobModel(company_id=3)]
     finance_stage_statistic = keyword_statistic_ctl.get_finance_stage_statistic(test_jobs_model)
     self.assertDictEqual(finance_stage_statistic, {
         '未融资': 2,
         'A轮': 1,
     })
Ejemplo n.º 2
0
def crawl_lagou_job_data_task(lagou_company_id):
    """爬取拉勾职位数据任务"""
    # 过滤本轮已经爬取过职位的公司
    if not redis_instance.setnx(constants.CRAWLED_COMPANY_JOBS_REDIS_KEY.format(lagou_company_id=lagou_company_id), 1):
        return
    jobs_pagination = crawlers.get_jobs_pagination_from_lagou(lagou_company_id=lagou_company_id,
                                                              job_type=constants.LagouJobType.technology)
    for page_no in jobs_pagination.iter_pages:
        job_dicts = crawlers.get_jobs_from_lagou(lagou_company_id=lagou_company_id,
                                                 job_type=constants.LagouJobType.technology,
                                                 page_no=page_no)
        if not job_dicts:
            break
        for job_dict in job_dicts:
            crawlers.clean_lagou_job_data(job_dict)
            utils.convert.convert_dict_field_to_constants(job_dict)

            keywords = job_dict.pop('keywords')
            city_name = job_dict.pop('city_name')

            city_ctl.insert_city_if_not_exist(city_name)
            job_dict['city_id'] = city_ctl.get_city_id_by_name(city_name)
            company = CompanyModel.get_one(filter_by={'lagou_company_id': lagou_company_id})
            job_dict['company_id'] = company.id

            job = JobModel.get_one(filter_by={'lagou_job_id': job_dict.lagou_job_id})
            if job:
                JobModel.update_by_pk(pk=job.id, values=job_dict)
            else:
                job_id = JobModel.add(**job_dict)

                for keyword in keywords:
                    keyword_ctl.insert_keyword_if_not_exist(name=keyword)
                    keyword_id = keyword_ctl.get_keyword_id_by_name(name=keyword)
                    JobKeywordModel.add(keyword_id=keyword_id, job_id=job_id)
Ejemplo n.º 3
0
 def test_get_work_years_statistic(self):
     test_jobs_model = [JobModel(work_year=WORK_YEARS_REQUEST_DICT['应届毕业生']),
                        JobModel(work_year=WORK_YEARS_REQUEST_DICT['应届毕业生']),
                        JobModel(work_year=WORK_YEARS_REQUEST_DICT['1-3年'])]
     work_years_statistic = keyword_statistic_ctl.get_work_years_statistic(test_jobs_model)
     self.assertDictEqual(work_years_statistic, {
         '应届毕业生': 2,
         '1-3年': 1,
     })
Ejemplo n.º 4
0
 def test_get_educations_statistic(self):
     test_jobs_model = [JobModel(education=EDUCATION_REQUEST_DICT['大专']),
                        JobModel(education=EDUCATION_REQUEST_DICT['本科']),
                        JobModel(education=EDUCATION_REQUEST_DICT['本科'])]
     educations_statistic = keyword_statistic_ctl.get_educations_statistic(test_jobs_model)
     self.assertDictEqual(educations_statistic, {
         '本科': 2,
         '大专': 1,
     })
Ejemplo n.º 5
0
    def test_count(self):
        job_quantitys = JobModel.count()
        self.assertEqual(job_quantitys, 2)

        # test count filter_by
        job_quantitys = JobModel.count(filter_by={'id': 4789})
        self.assertEqual(job_quantitys, 1)

        job_quantitys = JobModel.count(filter_by={'id': 1})
        self.assertEqual(job_quantitys, 0)
    def test_get_city_jobs_count_statistic(self):
        test_jobs_model = [
            JobModel(city_id=2),
            JobModel(city_id=2),
            JobModel(city_id=2),
            JobModel(city_id=2),
            JobModel(city_id=3),
            JobModel(city_id=3),
            JobModel(city_id=3),
            JobModel(city_id=4),
            JobModel(city_id=4)
        ]
        sorted_city_jobs_count_statistic = keyword_statistic_ctl.get_city_jobs_count_statistic(
            test_jobs_model)
        self.assertDictEqual(sorted_city_jobs_count_statistic, {
            '北京': 4,
            '上海': 3,
            '广州': 2,
        })

        sorted_city_jobs_count_statistic = keyword_statistic_ctl.get_city_jobs_count_statistic(
            test_jobs_model, 2)
        self.assertDictEqual(sorted_city_jobs_count_statistic, {
            '北京': 4,
            '上海': 3
        })
Ejemplo n.º 7
0
    def test_count(self):
        jobs_count = JobModel.count()
        self.assertEqual(jobs_count, 3)

        jobs_count = JobModel.count(filter_by={'city_id': 4})
        self.assertEqual(jobs_count, 2)

        jobs_count = JobModel.count(
            filter=(and_(JobModel.city_id == 4, JobModel.company_id == 3)))
        self.assertEqual(jobs_count, 1)

        jobs_count = JobModel.count(filter=(JobModel.id == 1))
        self.assertEqual(jobs_count, 1)
Ejemplo n.º 8
0
    def test_get_one(self):
        # test get by filter
        job = JobModel.get_one(filter=(JobModel.id == 4789)).dict()
        self.assertDictEqual(job, test_job_data)

        job = JobModel.get_one(filter=and_(JobModel.id == 4789, JobModel.created_at > 0)).dict()
        self.assertDictEqual(job, test_job_data)

        # test get by filter
        job = JobModel.get_one(filter_by={'id': 4789}).dict()
        self.assertDictEqual(job, test_job_data)

        job = JobModel.get_one(filter_by={'id': 4789}, filter=(JobModel.work_year == 1)).dict()
        self.assertDictEqual(job, test_job_data)
Ejemplo n.º 9
0
    def test_execute_sql_string(self):
        job_rows = JobModel.execute_sql_string('select id, title from job where id = :id', {'id': 4789})
        self.assertEqual(len(job_rows), 1)
        self.assertEqual(job_rows[0][0], 4789)
        self.assertEqual(job_rows[0][1], u'Android开发工程师')

        job_rows = JobModel.execute_sql_string('select id, title from job')
        self.assertEqual(len(job_rows), 2)
        self.assertEqual(job_rows[0][0], 4789)
        self.assertEqual(job_rows[0][1], u'Android开发工程师')

        affect_rows = JobModel.execute_sql_string("update job set title = '测试' where id = :id", {'id': 4789})
        self.assertEqual(affect_rows, 1)
        job = JobModel.get_by_pk(pk=4789)
        self.assertEqual(job.title, u'测试')
Ejemplo n.º 10
0
 def test_get_salary_statistic(self):
     test_jobs_model = [JobModel(salary='5k-9k'), JobModel(salary='10-15k'), JobModel(salary='15k-20k'),
                        JobModel(salary='16-18k'), JobModel(salary='20k-30k'), JobModel(salary='30k-35k'),
                        JobModel(salary='20k以上'), JobModel(salary='60k-100k'), JobModel(salary='40k-42k')]
     salary_statistic = keyword_statistic_ctl.get_salary_statistic(test_jobs_model)
     self.assertDictEqual(salary_statistic, {
         '10k及以下': 2,
         '11k-20k': 5,
         '21k-35k': 3,
         '36k-60k': 2,
         '61k以上': 1,
     })
Ejemplo n.º 11
0
def crawl_lagou_job_data_suites(lagou_company_id):
    jobs_pagination = lagou_jobs_scripts.crawl_lagou_jobs_pagination(
        lagou_company_id=lagou_company_id,
        job_type=constants.LagouJobType.technology)
    for page_no in jobs_pagination.iter_pages:
        job_dicts = lagou_jobs_scripts.crawl_lagou_jobs(
            lagou_company_id=lagou_company_id,
            job_type=constants.LagouJobType.technology,
            page_no=page_no)
        if not job_dicts:
            break
        for job_dict in job_dicts:
            if not job_dict.is_exist:
                lagou_jobs_scripts.clean_lagou_job_data(job_dict)
                lagou_jobs_scripts.convert_lagou_job_data(job_dict)

                company = CompanyModel.get_one(
                    filter_by={'lagou_company_id': lagou_company_id})
                job_dict['company_id'] = company.id
                keywords = job_dict.pop('keywords')
                advantage = job_dict.pop('advantage')
                description = job_dict.pop('description')
                job_dict.pop('city')

                job_id = JobModel.add(**job_dict)
                JobExtraModel.add(advantage=advantage,
                                  description=description,
                                  job_id=job_id)

                for keyword in keywords:
                    keyword_ctl.insert_keyword_if_not_exist(name=keyword)
                    keyword_id = keyword_ctl.get_keyword_id_by_name(
                        name=keyword)
                    JobKeywordModel.add(keyword_id=keyword_id, job_id=job_id)
Ejemplo n.º 12
0
    def test_execute_sql_string(self):
        job_rows = JobModel.execute_sql_string(
            'SELECT id, title FROM job WHERE id = :id', {'id': 1})
        self.assertEqual(len(job_rows), 1)
        self.assertEqual(job_rows[0][0], 1)
        self.assertEqual(job_rows[0][1], u'高级前端开发工程师')

        job_rows = JobModel.execute_sql_string('SELECT id, title FROM job')
        self.assertEqual(len(job_rows), 3)
        self.assertEqual(job_rows[0][0], 1)
        self.assertEqual(job_rows[0][1], u'高级前端开发工程师')

        affect_rows = JobModel.execute_sql_string(
            "UPDATE job SET title = '测试' WHERE id = :id", {'id': 1})
        self.assertEqual(affect_rows, 1)
        job = JobModel.get_by_pk(pk=1)
        self.assertEqual(job.title, u'测试')
Ejemplo n.º 13
0
    def test_update(self):
        init_job_data_dict = JobModel.get_by_pk(pk=4789).dict()
        to_update_data_dict = dict(title=u'后端吃饭工程师',
                                   work_year=2,
                                   city_id=1,
                                   company_id=11,
                                   department='飞天面条神教招聘',
                                   salary='20k-30k',
                                   education=2,
                                   description=u'日常工作:吃饭!')

        affect_rows = JobModel.update(filter_by={'id': 4789}, values=to_update_data_dict)
        self.assertEqual(affect_rows, 1)

        # 更新后预期的结果
        init_job_data_dict.update(**to_update_data_dict)
        expected_job_data_dict = init_job_data_dict
        init_updated_at = init_job_data_dict.pop('updated_at')

        new_job_data_dict = JobModel.get_by_pk(pk=4789).dict()
        self.assertDictContainsSubset(expected_job_data_dict, new_job_data_dict)
        self.assertGreater(new_job_data_dict.updated_at, init_updated_at)

        # 其他记录不受影响
        self.assertEqual(JobModel.get_by_pk(pk=6814).title, u'web前端')

        # 批量更改
        affect_rows = JobModel.update(filter_by={'company_id': 11}, values={'title': '测试'})
        self.assertEqual(affect_rows, 2)
        self.assertEqual(JobModel.get_by_pk(pk=6814).title, u'测试')
        self.assertEqual(JobModel.get_by_pk(pk=4789).title, u'测试')
Ejemplo n.º 14
0
 def test_add(self):
     to_add_data_dict = dict(
         lg_job_id=10004,
         city_id=3,
         company_id=1,
         title='Python 开发工程师',
         work_year=5,
         department='吖吖项目组',
         salary='15k-35k',
         education=2,
         nature=1,
         description='职位介绍D',
         advantage='16薪,工作居住证,六十八险一金,双休',
     )
     job_id = JobModel.add(**to_add_data_dict)
     self.assertTrue(job_id > 0)
     job = JobModel.get_by_pk(pk=job_id)
     self.assertDictContainsSubset(to_add_data_dict, job.dict())
Ejemplo n.º 15
0
    def test_add(self):
        to_add_data_dict = dict(title=u'后端吃饭工程师',
                                work_year=2,
                                city_id=1,
                                company_id=1,
                                department='飞天面条神教招聘',
                                salary='20k-30k',
                                education=2,
                                description=u'日常工作:吃饭!',
                                advantage='饭管饱, 管够',
                                nature=0)
        job_id = JobModel.add(**to_add_data_dict)

        self.assertTrue(job_id > 0)
        job = JobModel.get_by_pk(pk=job_id)
        self.assertDictContainsSubset(to_add_data_dict, job.dict())
        self.assertGreater(job.created_at, 0)
        self.assertGreater(job.updated_at, 0)
Ejemplo n.º 16
0
    def test_list(self):
        # test list
        jobs = JobModel.list()
        self.assertEqual(len(jobs), 3)
        self.assertDictEqual(jobs[0].dict(), test_job_dict)

        # test list limit
        jobs = JobModel.list(limit=1)
        self.assertEqual(len(jobs), 1)

        # test list offset
        jobs = JobModel.list(offset=1)
        self.assertEqual(len(jobs), 2)

        # test list filter_by
        jobs = JobModel.list(filter_by={'id': 1})
        self.assertEqual(len(jobs), 1)
        self.assertEqual(jobs[0].dict(), test_job_dict)
Ejemplo n.º 17
0
    def test_list(self):
        # test list
        jobs = JobModel.list()
        self.assertEqual(len(jobs), 2)
        self.assertDictEqual(jobs[0].dict(), test_job_data)

        # test list limit
        jobs = JobModel.list(limit=1)
        self.assertEqual(len(jobs), 1)

        # test list offset
        jobs = JobModel.list(offset=1)
        self.assertEqual(len(jobs), 1)

        # test list filter_by
        jobs = JobModel.list(filter_by={'id': 4789})
        self.assertEqual(len(jobs), 1)
        self.assertEqual(jobs[0].dict(), test_job_data)

        jobs = JobModel.list(filter_by={'id': 1})
        self.assertEqual(len(jobs), 0)

        # test list order_by
        jobs = JobModel.list(order_by=JobModel.id.desc())
        self.assertEqual(jobs[1].dict(), test_job_data)
Ejemplo n.º 18
0
    def test_update(self):
        init_job_data_dict = JobModel.get_by_pk(pk=1).dict()
        to_update_data_dict = dict(title=u'后端吃饭工程师',
                                   work_year=1,
                                   city_id=1,
                                   company_id=1,
                                   department='飞天面条神教招聘',
                                   salary='20k-32k',
                                   education=2,
                                   description=u'日常工作:吃饭!')

        affect_rows = JobModel.update(filter_by={'id': 1},
                                      values=to_update_data_dict)
        self.assertEqual(affect_rows, 1)

        # 更新后预期的结果
        init_job_data_dict.update(**to_update_data_dict)
        predictive_job_data_dict = init_job_data_dict
        init_updated_at = init_job_data_dict.pop('updated_at')

        new_job_data_dict = JobModel.get_by_pk(pk=1).dict()
        self.assertDictContainsSubset(predictive_job_data_dict,
                                      new_job_data_dict)
        self.assertGreater(new_job_data_dict.updated_at, init_updated_at)

        # 其他记录不受影响
        self.assertEqual(JobModel.get_by_pk(pk=2).title, u'前端开发工程师')

        # 批量更改
        affect_rows = JobModel.update(filter_by={'city_id': 4},
                                      values={'title': '测试'})
        self.assertEqual(affect_rows, 2)
        jobs = JobModel.list(filter_by={'city_id': 4})
        self.assertTrue(all([job.title == u'测试' for job in jobs]))
Ejemplo n.º 19
0
    def test_batch_add(self):
        # 插入了其他的类实例
        init_jobs_count = JobModel.count()
        model_instances = [
            CityModel(name='你好'),
            JobModel(title='招聘资深前端工程师',
                     city_id=1,
                     company_id=2,
                     lg_job_id=100056),
            JobModel(title='招聘资深中端工程师',
                     city_id=1,
                     company_id=2,
                     lg_job_id=100055),
        ]

        with self.assertRaises(ValueError):
            JobModel.batch_add(model_instances)

        self.assertEqual(JobModel.count(), init_jobs_count)

        model_instances = [
            JobModel(title='招聘资深前端工程师',
                     city_id=1,
                     company_id=2,
                     lg_job_id=100056),
            JobModel(title='招聘资深中端工程师',
                     city_id=1,
                     company_id=2,
                     lg_job_id=100055),
        ]

        JobModel.batch_add(model_instances)

        self.assertEqual(JobModel.count(), init_jobs_count + 2)
Ejemplo n.º 20
0
 def test_is_exist(self):
     is_exist = JobModel.is_exist(filter=(JobModel.id == 1))
     self.assertEqual(is_exist, True)
Ejemplo n.º 21
0
 def test_model_to_dict(self):
     job = JobModel.get_by_pk(pk=4789).dict()
     self.assertTrue(isinstance(job, dict))
     self.assertDictEqual(job, test_job_data)
Ejemplo n.º 22
0
 def test_get_by_pk(self):
     job = JobModel.get_by_pk(pk=4789)
     self.assertDictEqual(job.dict(), test_job_data)
Ejemplo n.º 23
0
    def test_get_one(self):
        job = JobModel.get_one(filter_by={'id': 1})
        self.assertDictEqual(job.dict(), test_job_dict)

        job = JobModel.get_one(filter=(JobModel.id == 1))
        self.assertDictEqual(job.dict(), test_job_dict)
Ejemplo n.º 24
0
 def test_update_by_pk(self):
     affect_rows = JobModel.update_by_pk(pk=6814, values={'title': '你好啊啊'})
     self.assertEqual(affect_rows, 1)
     self.assertEqual(JobModel.get_by_pk(pk=6814).title, u'你好啊啊')