Esempio n. 1
0
def test_occupational_scoped_skill_extractor_candidate_skills():
    extractor = SocScopedExactMatchSkillExtractor(sample_ontology())
    candidate_skills = sorted(extractor.candidate_skills(sample_job_posting()),
                              key=lambda cs: cs.skill_name)

    assert candidate_skills[0].skill_name == 'organization'
    assert candidate_skills[
        0].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance'
    assert candidate_skills[0].confidence == 100
Esempio n. 2
0
def test_occupational_scoped_skill_extractor_candidate_skills():
    with utils.makeNamedTemporaryCSV(sample_skills(), '\t') as skills_filename:
        extractor = SocScopedExactMatchSkillExtractor(
            skill_lookup_path=skills_filename)
        candidate_skills = sorted(extractor.candidate_skills(
            sample_job_posting()),
                                  key=lambda cs: cs.skill_name)

        assert candidate_skills[0].skill_name == 'organization'
        assert candidate_skills[
            0].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance'
        assert candidate_skills[0].confidence == 100
Esempio n. 3
0
    def _compute_func_on_one(self):
        corpus_creator = SimpleCorpusCreator()
        skill_extractor = SocScopedExactMatchSkillExtractor(
            skill_lookup_path=self.skill_lookup_path)

        def func(job_posting):
            count_dict = skill_extractor.document_skill_counts(
                soc_code=job_posting.get('onet_soc_code', '99-9999.00'),
                document=corpus_creator._transform(job_posting))
            count_lists = [[k] * v for k, v in count_dict.items()]
            flattened = [
                count for countlist in count_lists for count in countlist
            ]
            return {self.property_name: flattened}

        return func
Esempio n. 4
0
def test_occupation_scoped_freetext_skill_extractor():
    content = [[
        '', 'O*NET-SOC Code', 'Element ID', 'ONET KSA', 'Description',
        'skill_uuid', 'nlp_a'
    ],
               [
                   '1', '11-1011.00', '2.a.1.a', 'reading comprehension',
                   '...', '2c77c703bd66e104c78b1392c3203362',
                   'reading comprehension'
               ],
               [
                   '2', '11-1011.00', '2.a.1.b', 'active listening', '...',
                   'a636cb69257dcec699bce4f023a05126', 'active listening'
               ]]
    with utils.makeNamedTemporaryCSV(content, '\t') as skills_filename:
        extractor = SocScopedExactMatchSkillExtractor(
            skill_lookup_path=skills_filename)
        documents = [
            {
                'soc_code': '11-1011.00',
                'document': 'this is a job that needs active listening',
                'expected_value': Counter({'active listening': 1})
            },
            {
                'soc_code': '11-1011.00',
                'document': 'this is a reading comprehension job',
                'expected_value': Counter({'reading comprehension': 1})
            },
            {
                'soc_code': '11-1011.00',
                'document': 'this is an active and reading listening job',
                'expected_value': Counter(),
            },
            {
                'soc_code':
                '11-1011.00',
                'document':
                'this is a reading comprehension and active listening job',
                'expected_value':
                Counter({
                    'active listening': 1,
                    'reading comprehension': 1
                })
            },
            {
                'soc_code': '11-1021.00',
                'document': 'this is a job that needs active listening',
                'expected_value': Counter()
            },
            {
                'soc_code': '11-1021.00',
                'document': 'this is a reading comprehension job',
                'expected_value': Counter()
            },
            {
                'soc_code': '11-1021.00',
                'document': 'this is an active and reading listening job',
                'expected_value': Counter(),
            },
            {
                'soc_code': '11-1021.00',
                'document':
                'this is a reading comprehension and active listening job',
                'expected_value': Counter()
            },
            {
                'soc_code': None,
                'document': 'this is a job that needs active listening',
                'expected_value': Counter()
            },
            {
                'soc_code': None,
                'document': 'this is a reading comprehension job',
                'expected_value': Counter()
            },
            {
                'soc_code': None,
                'document': 'this is an active and reading listening job',
                'expected_value': Counter(),
            },
            {
                'soc_code': None,
                'document':
                'this is a reading comprehension and active listening job',
                'expected_value': Counter()
            },
        ]
        for document in documents:
            assert extractor.document_skill_counts(
                soc_code=document['soc_code'],
                document=document['document']) == document['expected_value']
Esempio n. 5
0
def test_occupation_scoped_freetext_skill_extractor():
    ontology = CompetencyOntology(
        competency_name='Sample Framework',
        competency_description='A few basic competencies',
        edges=[
            CompetencyOccupationEdge(
                competency=Competency(identifier='2.a.1.a',
                                      name='Reading Comprehension'),
                occupation=Occupation(identifier='11-1011.00')),
            CompetencyOccupationEdge(
                competency=Competency(identifier='2.a.1.b',
                                      name='Active Listening'),
                occupation=Occupation(identifier='11-1011.00')),
        ])
    extractor = SocScopedExactMatchSkillExtractor(ontology)
    documents = [
        {
            'id': '1234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1011.00',
            'description': 'this is a job that needs active listening',
            'expected_value': Counter({'active listening': 1})
        },
        {
            'id': '2234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1011.00',
            'description': 'this is a reading comprehension job',
            'expected_value': Counter({'reading comprehension': 1})
        },
        {
            'id': '3234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1011.00',
            'description': 'this is an active and reading listening job',
            'expected_value': Counter(),
        },
        {
            'id':
            '4234',
            '@type':
            'JobPosting',
            'onet_soc_code':
            '11-1011.00',
            'description':
            'this is a reading comprehension and active listening job',
            'expected_value':
            Counter({
                'active listening': 1,
                'reading comprehension': 1
            })
        },
        {
            'id': '5234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1021.00',
            'description': 'this is a job that needs active listening',
            'expected_value': Counter()
        },
        {
            'id': '6234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1021.00',
            'description': 'this is a reading comprehension job',
            'expected_value': Counter()
        },
        {
            'id': '7234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1021.00',
            'description': 'this is an active and reading listening job',
            'expected_value': Counter(),
        },
        {
            'id': '8234',
            '@type': 'JobPosting',
            'onet_soc_code': '11-1021.00',
            'description':
            'this is a reading comprehension and active listening job',
            'expected_value': Counter()
        },
        {
            'id': '9234',
            '@type': 'JobPosting',
            'onet_soc_code': None,
            'description': 'this is a job that needs active listening',
            'expected_value': Counter()
        },
        {
            'id': '1334',
            '@type': 'JobPosting',
            'onet_soc_code': None,
            'description': 'this is a reading comprehension job',
            'expected_value': Counter()
        },
        {
            'id': '1434',
            '@type': 'JobPosting',
            'onet_soc_code': None,
            'description': 'this is an active and reading listening job',
            'expected_value': Counter(),
        },
        {
            'id': '1534',
            '@type': 'JobPosting',
            'onet_soc_code': None,
            'description':
            'this is a reading comprehension and active listening job',
            'expected_value': Counter()
        },
    ]
    for document in documents:
        assert extractor.document_skill_counts(
            document) == document['expected_value']
Esempio n. 6
0
from skills_ml.evaluation.skill_extractors import candidate_skills_from_sample, metrics_for_candidate_skills
from skills_ml.evaluation.skill_extraction_metrics import TotalOccurrences, TotalVocabularySize, OntologyCompetencyRecall
from skills_ml.job_postings.common_schema import JobPostingCollectionSample
from tests.utils import sample_factory

sample = sample_factory(JobPostingCollectionSample())
print('Building ONET, may take a while to download')
full_onet = Onet()

skill_extractors = [
    SectionExtractSkillExtractor(),
    SkillEndingPatternExtractor(only_bulleted_lines=False),
    AbilityEndingPatternExtractor(only_bulleted_lines=False),
    FuzzyMatchSkillExtractor(full_onet.competency_framework),
    ExactMatchSkillExtractor(full_onet.competency_framework),
    SocScopedExactMatchSkillExtractor(full_onet)
]
print('Done building ONET! Now subsetting ONET into K,S,A')
metric_ontologies = [
    full_onet,
    full_onet.filter_by(lambda edge: 'Knowledge' in edge.competency.categories,
                        competency_name='onet_knowledge',
                        competency_description='ONET Knowledge'),
    full_onet.filter_by(lambda edge: 'Abilities' in edge.competency.categories,
                        competency_name='onet_ability',
                        competency_description='ONET Ability'),
    full_onet.filter_by(lambda edge: 'Skills' in edge.competency.categories,
                        competency_name='onet_skill',
                        competency_description='ONET Skill')
]
metrics = [