def main(): client_id = '422' datareader = DataReader.CSVReader('data/app_' + client_id + '_doc2vec.csv', 'data/opp_' + client_id + '_doc2vec.csv') cv_filename = 'data/cv_' + client_id + '.csv' train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_RandomCandidate( number_of_test_opp=30, number_of_test_candidate=30, seed=3) cv_df = pd.read_csv(cv_filename) # Drop the 'answer' columns test_app_df = gold_app_df.drop( ['opp_id', 'app_state', 'app_id', 'app_source_json'], axis=1) model = ContentBasedClassifier(train_app_df, test_app_df, opp_df, cv_df) result, _ = model.getRecommendationResult(30) log.info('Computing score on the recommendation.') evaluate = Evaluator.RecommenderEvaluator() scoreAll_df = evaluate.computeResult(result, gold_app_df, onlyInterviewOffer=False) scoreInterviewOffer_df = evaluate.computeResult(result, gold_app_df, onlyInterviewOffer=True) log.info('========= Overall Candidate Score =========') summaryAll_df = evaluate.printResult(scoreAll_df) summaryInterviewOffer_df = evaluate.printResult(scoreInterviewOffer_df) log.info('Client ID: %s' % client_id)
def __getTestData(app_filename, opp_filename, test_mode, seed): datareader = dr.CSVReader(app_filename, opp_filename) if test_mode == Test_Mode.Random_Candidate: train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_RandomCandidate(number_of_test_opp=500, number_of_test_candidate=50, seed=seed) elif test_mode == Test_Mode.Warm_Candidate: train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_WarmCandidate(number_of_test_opp=500, number_of_test_candidate=50, min_application=2, application_train=1, seed=seed) elif test_mode == Test_Mode.Cold_Candidate: train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_ColdCandidate(number_of_test_opp=500, number_of_test_candidate=50, seed=seed) elif test_mode == Test_Mode.Cold_Opportunity: train_app_df, gold_app_df, opp_df = datareader.getTrainNTestData_ColdOpportunity(number_of_test_opp=500, number_of_test_candidate=50, seed=seed) else: log.error('Invalid mode selected.') assert(False) return train_app_df, gold_app_df, opp_df