def ontology_from_candidate_skills(candidate_skills: CandidateSkillYielder, skill_extractor_name: str='unknown') -> CompetencyOntology: """Create an ontology from a list of candidate skills Simply associate each candidate skill with its ONET occupation. Args: candidate_skills (iterable of algorithms.skill_extractors.base.CandidateSkill objects) Returns: (skills_ml.ontologies.base.CompetencyOntology) """ ontology = CompetencyOntology( name=f'candidate_skill_{skill_extractor_name}', competency_name=f'candidate_skill_{skill_extractor_name}', competency_description=f'Constructed from CandidateSkill objects produced by the {skill_extractor_name} skill extractor' ) competencies_by_document_id = defaultdict(set) for candidate_skill in candidate_skills: competency = Competency( identifier=candidate_skill.skill_name.lower(), name=candidate_skill.skill_name ) if competency not in competencies_by_document_id[candidate_skill.document_id]: competencies_by_document_id[candidate_skill.document_id].add(competency) if competency not in ontology.competencies: ontology.add_competency(competency) occupation_code = get_onet_occupation(candidate_skill.source_object) occupation = Occupation(identifier=occupation_code) if occupation not in ontology.occupations: ontology.add_occupation(occupation) ontology.add_edge(occupation=occupation, competency=competency) return ontology
def eval(self, candidate_skills: CandidateSkillYielder, sample_len: int) -> float: num_total_occupations = len(self.lookup) num_total_terms = len(self.lookup) if num_total_terms == 0: logging.warning( 'Lookup has zero terms, cannot evaluate. Returning 0') return 0 found_occupations = set() for candidate_skill in candidate_skills: occupation = get_onet_occupation(candidate_skill.source_object) if occupation and occupation not in found_occupations: found_occupations.add(occupation) num_found_occupations = len(found_occupations) logging.info('Found %s occupations out of %s total', num_found_occupations, num_total_occupations) return float(num_found_occupations) / num_total_occupations
def transformer(self): return lambda job_posting: self.encoder.transform( [get_onet_occupation(job_posting)])
def extract_occupation_from_jobposting(self, job_posting): return (get_onet_occupation(job_posting), job_posting['id'])