Python CandidateSkillFactory 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tests.utils

hotexamples.com에서의 예제들: 12

Python CandidateSkillFactory - 12개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tests.utils.CandidateSkillFactory에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CandidateSkillFactory(8)

create_batch(4)

자주 사용되는 메소드들

CandidateSkillFactory (8)

create_batch (4)

예제 #1

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_EvaluationSetPrecision():
    # create a set of gold standard skills that are a subset of the ones being evaluated
    gold_standard_candidate_skills = [
        CandidateSkillFactory(document_id=str(i),
                              skill_name=str(i),
                              start_index=i) for i in range(0, 25)
    ]
    candidate_skills = [
        CandidateSkillFactory(document_id=str(i),
                              skill_name=str(i),
                              start_index=i) for i in range(0, 40)
    ]
    # in both strict and non-strict mode, all should match
    strict = EvaluationSetPrecision(gold_standard_candidate_skills,
                                    'test',
                                    strict=True)
    assert strict.name == 'test_evaluation_set_precision_strict'
    assert strict.eval(candidate_skills, 40) == 25 / 40
    nonstrict = EvaluationSetPrecision(gold_standard_candidate_skills,
                                       'test',
                                       strict=False)
    assert nonstrict.name == 'test_evaluation_set_precision_nonstrict'
    assert nonstrict.eval(candidate_skills, 40) == 25 / 40

    # now create candidate skills to evaluate that match for everything but the index is off
    # strict mode should reject all matches. non-strict will have the same result
    candidate_skills = [
        CandidateSkillFactory(document_id=str(i),
                              skill_name=str(i),
                              start_index=i + 1) for i in range(0, 40)
    ]
    assert strict.eval(candidate_skills, 40) == 0 / 40
    assert nonstrict.eval(candidate_skills, 40) == 25 / 40

예제 #2

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_PercentageNoSkillDocument():
    candidate_skills = [
        CandidateSkillFactory(document_id=str(i)) for i in range(0, 25)
    ]
    # 25 documents with skills, 5 without. should be 5/30
    assert PercentageNoSkillDocuments()\
        .eval(candidate_skills, 30) == 5/30

예제 #3

파일 보기

def test_ontology_from_candidate_skills():
    candidate_skills = [CandidateSkillFactory(skill_name=f'skill_{i}') for i in range(0, 25)]
    ontology = ontology_from_candidate_skills(candidate_skills, skill_extractor_name='tester')
    assert ontology.name == 'candidate_skill_tester'
    assert ontology.competency_framework.name == 'candidate_skill_tester'
    assert 'tester' in ontology.competency_framework.description
    assert len(ontology.competencies) == 25
    assert len(ontology.occupations) == 1

예제 #4

파일 보기

def test_ontology_from_candidate_skills_occupations():
    candidate_skills = [CandidateSkillFactory(
        skill_name=f'skill_{i}',
        source_object={'onet_soc_code': f'11-101{i%5}.00'}
    ) for i in range(0, 25)]
    ontology = ontology_from_candidate_skills(candidate_skills)
    assert len(ontology.competencies) == 25
    assert len(ontology.occupations) == 5

예제 #5

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_OntologyCompetencyRecall():
    ontology = sample_ontology()
    metric = OntologyCompetencyRecall(ontology)
    candidate_skills = CandidateSkillFactory.create_batch(
        50,
        matched_skill_identifier=list(
            ontology.competencies)[0].identifier.lower())
    assert metric.eval(candidate_skills, 50) ==\
        1/len(ontology.competencies)

예제 #6

파일 보기

def test_metrics_for_candidate_skills_nostore():
    candidate_skills = CandidateSkillFactory.create_batch(50)
    metrics = metrics_for_candidate_skills(
        candidate_skills=candidate_skills,
        sample=standard_sample(),
        metrics=[TotalOccurrences(), TotalVocabularySize()])
    assert len(metrics) == 2
    assert TotalOccurrences.name in metrics
    assert TotalVocabularySize.name in metrics

예제 #7

파일 보기

def test_metrics_for_candidate_skills_withstore():
    storage = InMemoryStore()
    candidate_skills = CandidateSkillFactory.create_batch(50)
    metrics = metrics_for_candidate_skills(
        candidate_skills=candidate_skills,
        sample=standard_sample(),
        metrics=[TotalOccurrences(), TotalVocabularySize()],
        output_storage=storage)

    assert json.loads(storage.load('mysample/metrics.json')) == metrics

예제 #8

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_OntologyOccupationRecall():
    ontology = sample_ontology()
    metric = OntologyOccupationRecall(ontology)
    candidate_skills = [
        CandidateSkillFactory(document_id=str(i),
                              source_object={
                                  'occupationalCategory':
                                  list(ontology.occupations)[0].identifier
                              }) for i in range(0, 25)
    ]
    assert metric.eval(candidate_skills, 50) == 1 / len(ontology.occupations)

예제 #9

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_SkillsPerDocumentHistogram():
    # 11 documents, each 1-9 having that # of candidate skills
    # we tell the metric that there were 13 total documents, meaning that the extra two had 0
    candidate_skills = []
    for document_id in range(1, 12):
        for cs in range(0, document_id):
            candidate_skills.append(
                CandidateSkillFactory(document_id=str(document_id)))

    assert SkillsPerDocumentHistogram(5).eval(candidate_skills,
                                              13) == [4, 2, 2, 2, 3]

예제 #10

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_MedianSkillsPerDocument():
    # candidate skills are created:
    # 10 for docid '0', 10 for docid '1', 5 for docid '2'.
    # at this point, the median would be 10
    candidate_skills = [
        CandidateSkillFactory(document_id=str(int(i / 10)))
        for i in range(0, 25)
    ]
    # but we indicate that the sample had 5 total documents
    # the count array should look like [10, 10, 5, 0, 0]
    # and the median should be 5
    assert MedianSkillsPerDocument()\
        .eval(candidate_skills, sample_len=5) == 5

예제 #11

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_TotalOccurrences():
    candidate_skills = CandidateSkillFactory.create_batch(50)
    assert TotalOccurrences().eval(candidate_skills, 100) == 50

예제 #12

파일 보기

파일: test_skill_extraction_metrics.py 프로젝트: AYCHDeveloper/DevDataSkillsTest

def test_TotalVocabularySize():
    candidate_skills = [
        CandidateSkillFactory(skill_name='skill_' + str(i % 10))
        for i in range(0, 100)
    ]
    assert TotalVocabularySize().eval(candidate_skills, 100) == 10