예제 #1
0
def test_load_geo_title_counts():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        with utils.makeNamedTemporaryCSV(sample_counts) as fname:
            year = 2014
            quarter = 2
            load_geo_title_counts(fname, year, quarter, engine)

        session = sessionmaker(engine)()

        # make sure the correct amount of rows were created
        assert session.query(GeoTitleCount).count() == 8
        assert session.query(Geography).count() == 7
        assert session.query(Quarter).count() == 1

        # try a different quarter
        with utils.makeNamedTemporaryCSV(sample_counts) as fname:
            year = 2014
            quarter = 3
            load_geo_title_counts(fname, year, quarter, engine)

        assert session.query(GeoTitleCount).count() == 16
        assert session.query(Geography).count() == 7
        assert session.query(Quarter).count() == 2

        # overwrite data for a quarter with existing data
        with utils.makeNamedTemporaryCSV(sample_counts) as fname:
            year = 2014
            quarter = 3
            load_geo_title_counts(fname, year, quarter, engine)

        assert session.query(GeoTitleCount).count() == 16
        assert session.query(Geography).count() == 7
        assert session.query(Quarter).count() == 2
def test_NormalizerResponse():
    random.seed(266)
    class TestNormalizerResponse(normalizer_evaluation.NormalizerResponse):
        def normalize(self, job_title):
            return [
                {'title': '{} {}'.format(job_title, i), 'relevance_score': 0.5}
                for i in range(0,5)
            ]

        def _good_response(self, response):
            return True

    content = [
        ['Cupcake Ninja', 'a baker', '1234'],
        ['Oyster Floater', 'a person that floats oysters', '2345'],
    ]

    with makeNamedTemporaryCSV(content, '\t') as csvname:
        evaluator = TestNormalizerResponse(
            name='test normalizer',
            access=csvname
        )
        result = [
            ranked_row
            for response in evaluator
            for ranked_row in evaluator.ranked_rows(response)
        ]
        assert result == [
            ('Cupcake Ninja', 'a baker', 'Cupcake Ninja 1', 1),
            ('Cupcake Ninja', 'a baker', 'Cupcake Ninja 2', 2),
            ('Cupcake Ninja', 'a baker', 'Cupcake Ninja 0', 0),
            ('Oyster Floater', 'a person that floats oysters', 'Oyster Floater 2', 2),
            ('Oyster Floater', 'a person that floats oysters', 'Oyster Floater 0', 0),
            ('Oyster Floater', 'a person that floats oysters', 'Oyster Floater 1', 1)
        ]
예제 #3
0
def test_exactmatch_skill_extractor():
    content = [[
        '', 'O*NET-SOC Code', 'Element ID', 'ONET KSA', 'Description',
        'skill_uuid', 'nlp_a'
    ],
               [
                   '1', '11-1011.00', '2.a.1.a', 'reading comprehension',
                   '...', '2c77c703bd66e104c78b1392c3203362',
                   'reading comprehension'
               ],
               [
                   '2', '11-1011.00', '2.a.1.b', 'active listening', '...',
                   'a636cb69257dcec699bce4f023a05126', 'active listening'
               ]]
    with utils.makeNamedTemporaryCSV(content, '\t') as skills_filename:
        extractor = ExactMatchSkillExtractor(skill_lookup_path=skills_filename)
        result = [
            extractor.document_skill_counts(doc) for doc in [
                'this is a job that needs active listening',
                'this is a reading comprehension job',
                'this is an active and reading listening job',
                'this is a reading comprehension and active listening job',
            ]
        ]

        assert result == [
            Counter({'active listening': 1}),
            Counter({'reading comprehension': 1}),
            Counter(),
            Counter({
                'active listening': 1,
                'reading comprehension': 1
            })
        ]
예제 #4
0
 def ensure_file(self, dataset):
     fake_data_lookup = {
         'Skills.txt': skills_content,
         'Abilities.txt': abilities_content,
         'Knowledge.txt': knowledge_content
     }
     with utils.makeNamedTemporaryCSV(fake_data_lookup[dataset],
                                      '\t') as temp:
         yield temp
예제 #5
0
 def ensure_file(self, dataset):
     fake_data_lookup = {
         'Sample of Reported Titles.txt': sample_content,
         'Occupation Data.txt': occupation_content,
         'Alternate Titles.txt': alternate_title_content,
     }
     with utils.makeNamedTemporaryCSV(fake_data_lookup[dataset],
                                      '\t') as temp:
         yield temp
예제 #6
0
 def ensure_file(self, dataset):
     fake_data_lookup = {
         'Skills.txt': skills_content,
         'Abilities.txt': abilities_content,
         'Knowledge.txt': knowledge_content,
         'Tools and Technology.txt': tools_content,
         'Content Model Reference.txt': cmr_content
     }
     with utils.makeNamedTemporaryCSV(fake_data_lookup[dataset],
                                      '\t') as temp:
         yield temp
예제 #7
0
def test_occupational_scoped_skill_extractor_candidate_skills():
    with utils.makeNamedTemporaryCSV(sample_skills(), '\t') as skills_filename:
        extractor = SocScopedExactMatchSkillExtractor(
            skill_lookup_path=skills_filename)
        candidate_skills = sorted(extractor.candidate_skills(
            sample_job_posting()),
                                  key=lambda cs: cs.skill_name)

        assert candidate_skills[0].skill_name == 'organization'
        assert candidate_skills[
            0].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance'
        assert candidate_skills[0].confidence == 100
예제 #8
0
def test_load_jobs_master():
    def num_rows():
        return len([row for row in engine.execute('select * from jobs_alternate_titles')])

    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        with utils.makeNamedTemporaryCSV(sample_input, separator='\t') as fname:
            load_jobs_master(fname, engine)
            load_alternate_titles(fname, engine)
            # we only want the non-occupation row
            assert num_rows() == 1
        def download(self, source_file):
            fake_data_lookup = {
                'Skills': skills_content,
                'Abilities': abilities_content,
                'Knowledge': knowledge_content,
                'Tools and Technology': tools_content,
            }

            with utils.makeNamedTemporaryCSV(fake_data_lookup[source_file],
                                             '\t') as tempname:
                with open(tempname) as fh:
                    return fh.read()
def test_load_jobs_unusual_titles():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        session = sessionmaker(engine)()
        with makeNamedTemporaryCSV(sample_titles, separator='\t') as fname:
            session.add(JobMaster('abcd', '41-2031.00', '', '', '', ''))
            session.commit()
            load_jobs_unusual_titles(fname, engine)
        assert session.query(JobUnusualTitle).count() == 1
        assert session.query(JobUnusualTitle)\
            .filter_by(job_uuid='abcd')\
            .count() == 1
예제 #11
0
def test_load_skills_master():
    def num_rows():
        return len([row for row in engine.execute('select * from skills_master')])

    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        with utils.makeNamedTemporaryCSV(sample_input, separator='\t') as fname:
            load_skills_master(fname, engine)
            assert num_rows() == 8

        # test that a new run with updated data updates the matched row
        with utils.makeNamedTemporaryCSV(new_input, separator='\t') as fname:
            load_skills_master(fname, engine)
            assert num_rows() == 8
            updated_skill_desc_query = '''
select description
from skills_master
where uuid = '2c77c703bd66e104c78b1392c3203362'
'''
            assert [
                row[0] for row
                in engine.execute(updated_skill_desc_query)
            ][0] == 'an updated description'
예제 #12
0
def test_load_jobs_master():
    def num_rows():
        return len(
            [row for row in engine.execute('select * from jobs_master')])

    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        with utils.makeNamedTemporaryCSV(sample_input,
                                         separator='\t') as fname:
            load_jobs_master(fname, engine)
            assert num_rows() == 3
            # make sure the occupation (first version) of Chief Executives is used
            title_query = '''
select title
from jobs_master
where uuid = 'e4063de16cae5cf29207ca572e3a891d'
'''
            assert [row[0] for row in engine.execute(title_query)
                    ][0] == 'Chief Executives'
def test_fuzzymatch_skill_extractor_candidate_skills():
    with utils.makeNamedTemporaryCSV(sample_skills(), '\t') as skills_filename:
        extractor = FuzzyMatchSkillExtractor(skill_lookup_path=skills_filename)
        candidate_skills = sorted(extractor.candidate_skills(
            sample_job_posting()),
                                  key=lambda cs: cs.skill_name)

        assert candidate_skills[0].skill_name == 'communication skills'
        assert candidate_skills[
            0].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance'
        assert candidate_skills[0].confidence == 95

        assert candidate_skills[1].skill_name == 'cooking'
        assert candidate_skills[
            1].context == 'One-two years cooking experience in a professional kitchen'
        assert candidate_skills[1].confidence == 100

        assert candidate_skills[2].skill_name == 'organization'
        assert candidate_skills[
            2].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance'
        assert candidate_skills[2].confidence == 100
def test_skills_importance():
    with testing.postgresql.Postgresql() as postgresql:
        engine = create_engine(postgresql.url())
        ensure_db(engine)
        session = sessionmaker(engine)()
        # this task depends on jobs and skills master being loaded
        # so add the needed rows
        session.add(JobMaster('job_uuid', '11-1011.00', '', '', '', ''))
        session.add(SkillMaster(uuid='skill_uuid1'))
        session.add(SkillMaster(uuid='skill_uuid2'))
        session.commit()
        with utils.makeNamedTemporaryCSV(sample_ksas, separator='\t') as fname:
            load_skills_importance(fname, engine)
            # the four rows will be smashed into two which have both LV and IM
            results = [
                row
                for row in engine.execute('select * from skills_importance')
            ]
            assert len(results) == 2
            assert results == [('job_uuid', 'skill_uuid1', 4.75, 4.12),
                               ('job_uuid', 'skill_uuid2', 4.88, 4.12)]
예제 #15
0
def test_occupation_scoped_freetext_skill_extractor():
    content = [[
        '', 'O*NET-SOC Code', 'Element ID', 'ONET KSA', 'Description',
        'skill_uuid', 'nlp_a'
    ],
               [
                   '1', '11-1011.00', '2.a.1.a', 'reading comprehension',
                   '...', '2c77c703bd66e104c78b1392c3203362',
                   'reading comprehension'
               ],
               [
                   '2', '11-1011.00', '2.a.1.b', 'active listening', '...',
                   'a636cb69257dcec699bce4f023a05126', 'active listening'
               ]]
    with utils.makeNamedTemporaryCSV(content, '\t') as skills_filename:
        extractor = SocScopedExactMatchSkillExtractor(
            skill_lookup_path=skills_filename)
        documents = [
            {
                'soc_code': '11-1011.00',
                'document': 'this is a job that needs active listening',
                'expected_value': Counter({'active listening': 1})
            },
            {
                'soc_code': '11-1011.00',
                'document': 'this is a reading comprehension job',
                'expected_value': Counter({'reading comprehension': 1})
            },
            {
                'soc_code': '11-1011.00',
                'document': 'this is an active and reading listening job',
                'expected_value': Counter(),
            },
            {
                'soc_code':
                '11-1011.00',
                'document':
                'this is a reading comprehension and active listening job',
                'expected_value':
                Counter({
                    'active listening': 1,
                    'reading comprehension': 1
                })
            },
            {
                'soc_code': '11-1021.00',
                'document': 'this is a job that needs active listening',
                'expected_value': Counter()
            },
            {
                'soc_code': '11-1021.00',
                'document': 'this is a reading comprehension job',
                'expected_value': Counter()
            },
            {
                'soc_code': '11-1021.00',
                'document': 'this is an active and reading listening job',
                'expected_value': Counter(),
            },
            {
                'soc_code': '11-1021.00',
                'document':
                'this is a reading comprehension and active listening job',
                'expected_value': Counter()
            },
            {
                'soc_code': None,
                'document': 'this is a job that needs active listening',
                'expected_value': Counter()
            },
            {
                'soc_code': None,
                'document': 'this is a reading comprehension job',
                'expected_value': Counter()
            },
            {
                'soc_code': None,
                'document': 'this is an active and reading listening job',
                'expected_value': Counter(),
            },
            {
                'soc_code': None,
                'document':
                'this is a reading comprehension and active listening job',
                'expected_value': Counter()
            },
        ]
        for document in documents:
            assert extractor.document_skill_counts(
                soc_code=document['soc_code'],
                document=document['document']) == document['expected_value']
 def download(self, *args):
     with utils.makeNamedTemporaryCSV(occupation_content, '\t') as temp:
         return temp