def get_target_for_candidate(candidate): """ Contrast between very good and very bad candidates, where decisions have already been made explicitly No ambiguous states (such as Backlog, Did Interview etc.) :param candidate: :return: 1 = Very Good Match, 0 = Bad match, np.nan = unknown """ if candidate.state in State.get_recommended_states(): return 1 elif candidate.state in State.get_rejected_states(): return 0
def user_has_been_recommended(user): """ Returns boolean indicating if user already has a job. """ candidates = Candidate.objects.filter( user=user, state__code__in=State.get_recommended_state_codes()) return candidates.exists()
def get_relevant_candidates(campaign): candidates = Candidate.objects.filter( campaign=campaign, state__code__in=State.get_relevant_state_codes(), removed=False) return sorted( candidates, key=lambda c: c.evaluation_summary.final_score + (c.state.code == 'STC') * 100 if c.evaluation_summary and c.evaluation_summary.final_score else -1, reverse=True)
def calculate_operational_efficiency(campaign): """ Important KPI answering how difficult is to find a good candidate on late stages of process This percentage should as high as possible, otherwise to much time is spent on operation (interviews, messages, etc.) """ recommended_count = get_recommended_candidates_count(campaign) not_that_good_count = Candidate.objects.filter( campaign=campaign, state__in=State.get_rejected_by_human_states()).count() total = recommended_count + not_that_good_count if total != 0: campaign.operational_efficiency = recommended_count / total else: campaign.operational_efficiency = None campaign.save()
def get_users_from_tests(campaign): """ An additional source of candidates are the ones who both pass the simple filter conditions and pass all the tests that the campaign is asking in a cognitive and technical aspect. :param campaign: Campaign :return: users """ campaign_tests = [ t for t in campaign.tests.all() if t.type.code in ['C', 'T'] ] # implicit implementation of the simple filter. candidates = Candidate.objects.filter( ~Q(state__code__in=State.get_recommended_states()), user__work_area=campaign.work_area, user__city=campaign.city, user__salary__gte=campaign.get_very_low_salary(), user__salary__lte=campaign.get_very_high_salary()) # TODO: missing complementary tests from the same user in different candidates # example: # campaign_tests = [1, 2, 3] # candidate1.tests = [1, 2] # candidate2.tests = [3] # True = candidate1.user == candidate2.user prospects = [] for c in candidates: last_evaluation = c.get_last_evaluation() if last_evaluation: passing_tests = [ s.test for s in last_evaluation.scores.all() if s.passed ] if all([t in passing_tests for t in campaign_tests]): prospects.append(c) return [c.user for c in prospects]
def update_importance(test): """ Updates the importance and other statistics for each question of the test :param test: obj :return: None """ questions, candidates = get_questions_and_candidates(test) df = pd.DataFrame(columns=[q.id for q in questions] + ['passed'], index=[c.id for c in candidates.keys()]) # only process worthwhile tests if len(candidates) < MIN_NUMBER_OF_CANDIDATES: return print('processing test: {}'.format(test).encode('utf-8')) #total_trivial = 0 #right_trivial = 0 for candidate, values in candidates.items(): if candidate.state in State.get_rejected_states( ) + State.get_recommended_states(): end_state_passes = 1 if candidate.state in State.get_recommended_states( ) else 0 df.loc[candidate.id, 'passed'] = end_state_passes #print(test.cut_score/100) #print(int(sum(values.values()))) #print(test.cut_score/100 * len(questions)) trivial_rule = int( sum(values.values()) >= test.cut_score / 100 * len(questions)) #total_trivial += 1 #right_trivial += trivial_rule == end_state_passes for question, passed in values.items(): df.loc[candidate.id, question.id] = passed df.dropna(axis=0, inplace=True) data = learn.DataPair(target=df['passed']) df.drop('passed', axis=1, inplace=True) data.features = df data = new_balance(data) if data is None: print("couldn't do the re-balance will exit.") return # TODO: measure trivial solution after rebalancing """ total_trivial = 0 right_trivial = 0 for candidate, values in candidates.items(): if candidate.state in State.get_rejected_states() + State.get_recommended_states(): end_state_passes = 1 if candidate.state in State.get_recommended_states() else 0 trivial_rule = int(sum(values.values()) > test.cut_score * len(questions)) total_trivial += 1 right_trivial += trivial_rule == end_state_passes """ importance = pd.DataFrame(columns=list(df)) cross_val_scores_array = [] for seed in range(NUMBER_OF_TRIALS): X_train, X_test, y_train, y_test = train_test_split(data.features, list(data.target), test_size=0.3, random_state=seed) grid_model = GridSearchCV(RandomForestClassifier(random_state=seed), PARAMS, n_jobs=1) grid_model.fit(X_train, y_train) print('optimal params: ' + str(grid_model.best_params_)) model = RandomForestClassifier( max_depth=grid_model.best_params_['max_depth'], n_estimators=grid_model.best_params_['n_estimators'], random_state=seed) model.fit(X_train, y_train) c = statistics.mean( float(e) for e in cross_val_score( model, X_test, y_test, scoring='accuracy')) cross_val_scores_array.append(c) p = model.feature_importances_ importance = importance.append( {col: value for col, value in zip(list(X_train), p)}, ignore_index=True) mean_importance = importance.mean() for q in questions: q.importance = mean_importance.loc[q.id] q.valid_answer_count = len(data.target) q.difficulty = 1 - statistics.mean(data.features[q.id]) q.save() print('importance avg: ' + str(mean_importance)) print('importance std: ' + str(importance.std())) print('Confusion Matrix:') test_prediction = model.predict(X_test) print(confusion_matrix(y_test, test_prediction)) # TODO: missing trivial #trivial_accuracy = right_trivial / total_trivial #print('trivial accuracy: ' + str(trivial_accuracy)) print('trivial accuracy: TODO') cross_val = statistics.mean(cross_val_scores_array) #print('delta (cross_val - trivial): ' + str(cross_val - trivial_accuracy)) print('delta (cross_val - trivial): TODO') print('avg cross val: ' + str(cross_val)) print('std cross val: ' + str(statistics.stdev(cross_val_scores_array)))
def get_filtered_candidates(): """Very good candidates contrasted with very bad ones""" return Candidate.objects.exclude(campaign_id__in=[constants.DEFAULT_CAMPAIGN_ID])\ .filter(state__in=State.get_recommended_states() + State.get_rejected_states())
def get_recommended_candidates_count(campaign): return Candidate.objects.filter( campaign=campaign, state__code__in=State.get_recommended_state_codes(), removed=False).count()
def get_application_candidates_count(campaign): return Candidate.objects.filter( campaign=campaign, state__code__in=State.get_applicant_state_codes(), removed=False).count()
def get_rejected_candidates(campaign): return Candidate.objects.filter(campaign=campaign, state__code__in=State.get_rejected_state_codes(), removed=False)\ .order_by(F('evaluation_summary__final_score').desc(nulls_last=True))