Example #1
0
def main():
    client_id = '422'
    datareader = DataReader.CSVReader('data/app_' + client_id + '_doc2vec.csv',
                                      'data/opp_' + client_id + '_doc2vec.csv')
    cv_filename = 'data/cv_' + client_id + '.csv'
    train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_RandomCandidate(
        number_of_test_opp=30, number_of_test_candidate=30, seed=3)

    cv_df = pd.read_csv(cv_filename)
    # Drop the 'answer' columns
    test_app_df = gold_app_df.drop(
        ['opp_id', 'app_state', 'app_id', 'app_source_json'], axis=1)

    model = ContentBasedClassifier(train_app_df, test_app_df, opp_df, cv_df)
    result, _ = model.getRecommendationResult(30)

    log.info('Computing score on the recommendation.')
    evaluate = Evaluator.RecommenderEvaluator()
    scoreAll_df = evaluate.computeResult(result,
                                         gold_app_df,
                                         onlyInterviewOffer=False)
    scoreInterviewOffer_df = evaluate.computeResult(result,
                                                    gold_app_df,
                                                    onlyInterviewOffer=True)

    log.info('========= Overall Candidate Score =========')
    summaryAll_df = evaluate.printResult(scoreAll_df)
    summaryInterviewOffer_df = evaluate.printResult(scoreInterviewOffer_df)

    log.info('Client ID: %s' % client_id)
Example #2
0
def __getTestData(app_filename, opp_filename, test_mode, seed):
    datareader = dr.CSVReader(app_filename, opp_filename)

    if test_mode == Test_Mode.Random_Candidate:
        train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_RandomCandidate(number_of_test_opp=500,
                                                                                         number_of_test_candidate=50,
                                                                                         seed=seed)
    elif test_mode == Test_Mode.Warm_Candidate:
        train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_WarmCandidate(number_of_test_opp=500,
                                                                                       number_of_test_candidate=50,
                                                                                       min_application=2,
                                                                                       application_train=1,
                                                                                       seed=seed)
    elif test_mode == Test_Mode.Cold_Candidate:
        train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_ColdCandidate(number_of_test_opp=500,
                                                                                       number_of_test_candidate=50,
                                                                                       seed=seed)
    elif test_mode == Test_Mode.Cold_Opportunity:
        train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_ColdOpportunity(number_of_test_opp=500,
                                                                                         number_of_test_candidate=50,
                                                                                         seed=seed)
    else:
        log.error('Invalid mode selected.')
        assert(False)

    return train_app_df, gold_app_df, opp_df