def test_occupational_scoped_skill_extractor_candidate_skills(): extractor = SocScopedExactMatchSkillExtractor(sample_ontology()) candidate_skills = sorted(extractor.candidate_skills(sample_job_posting()), key=lambda cs: cs.skill_name) assert candidate_skills[0].skill_name == 'organization' assert candidate_skills[ 0].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance' assert candidate_skills[0].confidence == 100
def test_occupational_scoped_skill_extractor_candidate_skills(): with utils.makeNamedTemporaryCSV(sample_skills(), '\t') as skills_filename: extractor = SocScopedExactMatchSkillExtractor( skill_lookup_path=skills_filename) candidate_skills = sorted(extractor.candidate_skills( sample_job_posting()), key=lambda cs: cs.skill_name) assert candidate_skills[0].skill_name == 'organization' assert candidate_skills[ 0].context == 'Organization, Cleanliness, Trainability, team player, good communication skillz, Motivation, a Sense of Responsibility and Pride in your Performance' assert candidate_skills[0].confidence == 100
def _compute_func_on_one(self): corpus_creator = SimpleCorpusCreator() skill_extractor = SocScopedExactMatchSkillExtractor( skill_lookup_path=self.skill_lookup_path) def func(job_posting): count_dict = skill_extractor.document_skill_counts( soc_code=job_posting.get('onet_soc_code', '99-9999.00'), document=corpus_creator._transform(job_posting)) count_lists = [[k] * v for k, v in count_dict.items()] flattened = [ count for countlist in count_lists for count in countlist ] return {self.property_name: flattened} return func
def test_occupation_scoped_freetext_skill_extractor(): content = [[ '', 'O*NET-SOC Code', 'Element ID', 'ONET KSA', 'Description', 'skill_uuid', 'nlp_a' ], [ '1', '11-1011.00', '2.a.1.a', 'reading comprehension', '...', '2c77c703bd66e104c78b1392c3203362', 'reading comprehension' ], [ '2', '11-1011.00', '2.a.1.b', 'active listening', '...', 'a636cb69257dcec699bce4f023a05126', 'active listening' ]] with utils.makeNamedTemporaryCSV(content, '\t') as skills_filename: extractor = SocScopedExactMatchSkillExtractor( skill_lookup_path=skills_filename) documents = [ { 'soc_code': '11-1011.00', 'document': 'this is a job that needs active listening', 'expected_value': Counter({'active listening': 1}) }, { 'soc_code': '11-1011.00', 'document': 'this is a reading comprehension job', 'expected_value': Counter({'reading comprehension': 1}) }, { 'soc_code': '11-1011.00', 'document': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'soc_code': '11-1011.00', 'document': 'this is a reading comprehension and active listening job', 'expected_value': Counter({ 'active listening': 1, 'reading comprehension': 1 }) }, { 'soc_code': '11-1021.00', 'document': 'this is a job that needs active listening', 'expected_value': Counter() }, { 'soc_code': '11-1021.00', 'document': 'this is a reading comprehension job', 'expected_value': Counter() }, { 'soc_code': '11-1021.00', 'document': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'soc_code': '11-1021.00', 'document': 'this is a reading comprehension and active listening job', 'expected_value': Counter() }, { 'soc_code': None, 'document': 'this is a job that needs active listening', 'expected_value': Counter() }, { 'soc_code': None, 'document': 'this is a reading comprehension job', 'expected_value': Counter() }, { 'soc_code': None, 'document': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'soc_code': None, 'document': 'this is a reading comprehension and active listening job', 'expected_value': Counter() }, ] for document in documents: assert extractor.document_skill_counts( soc_code=document['soc_code'], document=document['document']) == document['expected_value']
def test_occupation_scoped_freetext_skill_extractor(): ontology = CompetencyOntology( competency_name='Sample Framework', competency_description='A few basic competencies', edges=[ CompetencyOccupationEdge( competency=Competency(identifier='2.a.1.a', name='Reading Comprehension'), occupation=Occupation(identifier='11-1011.00')), CompetencyOccupationEdge( competency=Competency(identifier='2.a.1.b', name='Active Listening'), occupation=Occupation(identifier='11-1011.00')), ]) extractor = SocScopedExactMatchSkillExtractor(ontology) documents = [ { 'id': '1234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is a job that needs active listening', 'expected_value': Counter({'active listening': 1}) }, { 'id': '2234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is a reading comprehension job', 'expected_value': Counter({'reading comprehension': 1}) }, { 'id': '3234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'id': '4234', '@type': 'JobPosting', 'onet_soc_code': '11-1011.00', 'description': 'this is a reading comprehension and active listening job', 'expected_value': Counter({ 'active listening': 1, 'reading comprehension': 1 }) }, { 'id': '5234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is a job that needs active listening', 'expected_value': Counter() }, { 'id': '6234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is a reading comprehension job', 'expected_value': Counter() }, { 'id': '7234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'id': '8234', '@type': 'JobPosting', 'onet_soc_code': '11-1021.00', 'description': 'this is a reading comprehension and active listening job', 'expected_value': Counter() }, { 'id': '9234', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is a job that needs active listening', 'expected_value': Counter() }, { 'id': '1334', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is a reading comprehension job', 'expected_value': Counter() }, { 'id': '1434', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is an active and reading listening job', 'expected_value': Counter(), }, { 'id': '1534', '@type': 'JobPosting', 'onet_soc_code': None, 'description': 'this is a reading comprehension and active listening job', 'expected_value': Counter() }, ] for document in documents: assert extractor.document_skill_counts( document) == document['expected_value']
from skills_ml.evaluation.skill_extractors import candidate_skills_from_sample, metrics_for_candidate_skills from skills_ml.evaluation.skill_extraction_metrics import TotalOccurrences, TotalVocabularySize, OntologyCompetencyRecall from skills_ml.job_postings.common_schema import JobPostingCollectionSample from tests.utils import sample_factory sample = sample_factory(JobPostingCollectionSample()) print('Building ONET, may take a while to download') full_onet = Onet() skill_extractors = [ SectionExtractSkillExtractor(), SkillEndingPatternExtractor(only_bulleted_lines=False), AbilityEndingPatternExtractor(only_bulleted_lines=False), FuzzyMatchSkillExtractor(full_onet.competency_framework), ExactMatchSkillExtractor(full_onet.competency_framework), SocScopedExactMatchSkillExtractor(full_onet) ] print('Done building ONET! Now subsetting ONET into K,S,A') metric_ontologies = [ full_onet, full_onet.filter_by(lambda edge: 'Knowledge' in edge.competency.categories, competency_name='onet_knowledge', competency_description='ONET Knowledge'), full_onet.filter_by(lambda edge: 'Abilities' in edge.competency.categories, competency_name='onet_ability', competency_description='ONET Ability'), full_onet.filter_by(lambda edge: 'Skills' in edge.competency.categories, competency_name='onet_skill', competency_description='ONET Skill') ] metrics = [