Esempio n. 1
0
def get_target_for_candidate(candidate):
    """
    Contrast between very good and very bad candidates, where decisions have already been made explicitly
    No ambiguous states (such as Backlog, Did Interview etc.)
    :param candidate:
    :return: 1 = Very Good Match, 0 = Bad match, np.nan = unknown
    """
    if candidate.state in State.get_recommended_states():
        return 1
    elif candidate.state in State.get_rejected_states():
        return 0
Esempio n. 2
0
def user_has_been_recommended(user):
    """
    Returns boolean indicating if user already has a job.
    """
    candidates = Candidate.objects.filter(
        user=user, state__code__in=State.get_recommended_state_codes())
    return candidates.exists()
Esempio n. 3
0
def get_relevant_candidates(campaign):
    candidates = Candidate.objects.filter(
        campaign=campaign,
        state__code__in=State.get_relevant_state_codes(),
        removed=False)
    return sorted(
        candidates,
        key=lambda c: c.evaluation_summary.final_score +
        (c.state.code == 'STC') * 100
        if c.evaluation_summary and c.evaluation_summary.final_score else -1,
        reverse=True)
Esempio n. 4
0
def calculate_operational_efficiency(campaign):
    """
    Important KPI answering how difficult is to find a good candidate on late stages of process
    This percentage should as high as possible,
    otherwise to much time is spent on operation (interviews, messages, etc.)
    """
    recommended_count = get_recommended_candidates_count(campaign)
    not_that_good_count = Candidate.objects.filter(
        campaign=campaign,
        state__in=State.get_rejected_by_human_states()).count()
    total = recommended_count + not_that_good_count

    if total != 0:
        campaign.operational_efficiency = recommended_count / total
    else:
        campaign.operational_efficiency = None
    campaign.save()
Esempio n. 5
0
def get_users_from_tests(campaign):
    """
    An additional source of candidates are the ones who both pass the simple filter conditions and
    pass all the tests that the campaign is asking in a cognitive and technical aspect.
    :param campaign: Campaign
    :return: users
    """
    campaign_tests = [
        t for t in campaign.tests.all() if t.type.code in ['C', 'T']
    ]

    # implicit implementation of the simple filter.
    candidates = Candidate.objects.filter(
        ~Q(state__code__in=State.get_recommended_states()),
        user__work_area=campaign.work_area,
        user__city=campaign.city,
        user__salary__gte=campaign.get_very_low_salary(),
        user__salary__lte=campaign.get_very_high_salary())

    # TODO: missing complementary tests from the same user in different candidates
    # example:
    # campaign_tests = [1, 2, 3]
    # candidate1.tests = [1, 2]
    # candidate2.tests = [3]
    # True = candidate1.user == candidate2.user

    prospects = []
    for c in candidates:
        last_evaluation = c.get_last_evaluation()
        if last_evaluation:
            passing_tests = [
                s.test for s in last_evaluation.scores.all() if s.passed
            ]
            if all([t in passing_tests for t in campaign_tests]):
                prospects.append(c)

    return [c.user for c in prospects]
Esempio n. 6
0
def update_importance(test):
    """
    Updates the importance and other statistics for each question of the test
    :param test: obj
    :return: None
    """
    questions, candidates = get_questions_and_candidates(test)
    df = pd.DataFrame(columns=[q.id for q in questions] + ['passed'],
                      index=[c.id for c in candidates.keys()])

    # only process worthwhile tests
    if len(candidates) < MIN_NUMBER_OF_CANDIDATES:
        return

    print('processing test: {}'.format(test).encode('utf-8'))

    #total_trivial = 0
    #right_trivial = 0
    for candidate, values in candidates.items():

        if candidate.state in State.get_rejected_states(
        ) + State.get_recommended_states():
            end_state_passes = 1 if candidate.state in State.get_recommended_states(
            ) else 0
            df.loc[candidate.id, 'passed'] = end_state_passes
            #print(test.cut_score/100)
            #print(int(sum(values.values())))
            #print(test.cut_score/100 * len(questions))
            trivial_rule = int(
                sum(values.values()) >= test.cut_score / 100 * len(questions))

            #total_trivial += 1
            #right_trivial += trivial_rule == end_state_passes

            for question, passed in values.items():
                df.loc[candidate.id, question.id] = passed

    df.dropna(axis=0, inplace=True)

    data = learn.DataPair(target=df['passed'])
    df.drop('passed', axis=1, inplace=True)
    data.features = df

    data = new_balance(data)
    if data is None:
        print("couldn't do the re-balance will exit.")
        return

    # TODO: measure trivial solution after rebalancing
    """
    total_trivial = 0
    right_trivial = 0
    for candidate, values in candidates.items():

        if candidate.state in State.get_rejected_states() + State.get_recommended_states():
            end_state_passes = 1 if candidate.state in State.get_recommended_states() else 0
            trivial_rule = int(sum(values.values()) > test.cut_score * len(questions))

            total_trivial += 1
            right_trivial += trivial_rule == end_state_passes
    """

    importance = pd.DataFrame(columns=list(df))
    cross_val_scores_array = []
    for seed in range(NUMBER_OF_TRIALS):

        X_train, X_test, y_train, y_test = train_test_split(data.features,
                                                            list(data.target),
                                                            test_size=0.3,
                                                            random_state=seed)

        grid_model = GridSearchCV(RandomForestClassifier(random_state=seed),
                                  PARAMS,
                                  n_jobs=1)
        grid_model.fit(X_train, y_train)

        print('optimal params: ' + str(grid_model.best_params_))

        model = RandomForestClassifier(
            max_depth=grid_model.best_params_['max_depth'],
            n_estimators=grid_model.best_params_['n_estimators'],
            random_state=seed)
        model.fit(X_train, y_train)

        c = statistics.mean(
            float(e) for e in cross_val_score(
                model, X_test, y_test, scoring='accuracy'))
        cross_val_scores_array.append(c)

        p = model.feature_importances_
        importance = importance.append(
            {col: value
             for col, value in zip(list(X_train), p)},
            ignore_index=True)

    mean_importance = importance.mean()
    for q in questions:
        q.importance = mean_importance.loc[q.id]
        q.valid_answer_count = len(data.target)
        q.difficulty = 1 - statistics.mean(data.features[q.id])
        q.save()

    print('importance avg: ' + str(mean_importance))
    print('importance std: ' + str(importance.std()))

    print('Confusion Matrix:')
    test_prediction = model.predict(X_test)
    print(confusion_matrix(y_test, test_prediction))

    # TODO: missing trivial
    #trivial_accuracy = right_trivial / total_trivial
    #print('trivial accuracy: ' + str(trivial_accuracy))
    print('trivial accuracy: TODO')

    cross_val = statistics.mean(cross_val_scores_array)
    #print('delta (cross_val - trivial): ' + str(cross_val - trivial_accuracy))
    print('delta (cross_val - trivial): TODO')
    print('avg cross val: ' + str(cross_val))
    print('std cross val: ' + str(statistics.stdev(cross_val_scores_array)))
Esempio n. 7
0
def get_filtered_candidates():
    """Very good candidates contrasted with very bad ones"""
    return Candidate.objects.exclude(campaign_id__in=[constants.DEFAULT_CAMPAIGN_ID])\
        .filter(state__in=State.get_recommended_states() + State.get_rejected_states())
Esempio n. 8
0
def get_recommended_candidates_count(campaign):
    return Candidate.objects.filter(
        campaign=campaign,
        state__code__in=State.get_recommended_state_codes(),
        removed=False).count()
Esempio n. 9
0
def get_application_candidates_count(campaign):
    return Candidate.objects.filter(
        campaign=campaign,
        state__code__in=State.get_applicant_state_codes(),
        removed=False).count()
Esempio n. 10
0
def get_rejected_candidates(campaign):
    return Candidate.objects.filter(campaign=campaign,
                                    state__code__in=State.get_rejected_state_codes(),
                                    removed=False)\
        .order_by(F('evaluation_summary__final_score').desc(nulls_last=True))