import sys from seo import letor_fold_creator_min_max_normalize as lfc from model_running import cross_validator as cv from seo import exponential_budget_cost_creator as e if __name__ == "__main__": data_set_location = "/lv_local/home/sgregory/letor" new_data_set_location = "/lv_local/home/sgregory/letor_fixed1" l = lfc.letor_folds_creator(data_set_location, new_data_set_location, True) c = cv.cross_validator(5, l, "LTOR1") c.k_fold_cross_validation("SVM", "qrels")
from seo import query_to_fold as qtf from seo import letor_fold_creator as lfc from model_running import cross_validator as cv import sys if __name__ == "__main__": data_set_location = sys.argv[1] print data_set_location new_data_set_location = sys.argv[2] qrel_path = sys.argv[3] print qrel_path q = qtf.qtf(data_set_location) q.create_query_to_fold_index() l = lfc.letor_folds_creator(data_set_location, new_data_set_location, True) c = cv.cross_validator(5, l, "LTOR_MART") c.k_fold_cross_validation("LAMBDAMART", qrel_path)
def average_list(list_a,iterations): return [float(a)/iterations for a in list_a] def sum_dicts(x,y): return {k: x.get(k, 0) + y.get(k, 0) for k in set(x) | set(y)} def average_dict(dict_a,iterations): return {k: float(dict_a.get(k, 0))/iterations for k in set(dict_a)} if __name__ == "__main__": data_set_location = "/lv_local/home/sgregory/letor_fixed1" q = qtf.qtf(data_set_location) q.create_query_to_fold_index() l = lfc.letor_folds_creator_z_normalize(data_set_location, data_set_location, True) c = cv.cross_validator(5, l, "LTOR_MART_min_max") lbda_score_file = "/lv_local/home/sgregory/LTOR_MART_min_max/test_scores_trec_format/LAMBDAMART/final_score_combined.txt" svm_score_file = "/lv_local/home/sgregory/LTOR1/test_scores_trec_format/SVM/final_score_combined.txt" rel_index = ri.relevance_index("qrels") rel_index.create_relevance_index() pool = p(3) gg = d.lambda_mart_stats_handler("01", 0.1,c) aa = d.lambda_mart_stats_handler("005", 0.05,c) bb = d.lambda_mart_stats_handler("001", 0.01, c) svm_gg = srfh.winner_reference_point_random("01",0.1) svm_aa = srfh.winner_reference_point_random("005", 0.05) svm_bb = srfh.winner_reference_point_random("001", 0.01) lbda_chosen_models = gg.recover_models_per_fold("/lv_local/home/sgregory/LTOR_MART_min_max/models/LAMBDAMART", "/lv_local/home/sgregory/LTOR_MART_min_max/test_scores_trec_format/LAMBDAMART/")
import os import sys from model_running import cross_validator as cv if __name__ == "__main__": model = sys.argv[1] #user's input of model if model != "LAMBDAMART" and model != "SVM": print("please insert correct model to run") sys.exit(1) featues_file = sys.argv[2] if not os.path.exists(featues_file): print("please insert correct path to train file") sys.exit(1) query_relevance_file = sys.argv[3] if not os.path.exists(query_relevance_file): print("please insert correct path to relevance file") sys.exit(1) data_set = sys.argv[4] cross_validator = cv.cross_validator( 5, featues_file, data_set, 200) #TODO: maybe add user input params for more generality cross_validator.k_fold_cross_validation(model, query_relevance_file)