if __name__ == "__main__": FEATURE_SIZE = 105 NUM_INTERACTION = 2000000 click_models = ["informational", "navigational", "perfect"] # click_models = ["perfect"] Learning_rate = 0.1 num_groups = 4 dataset_path = "datasets/clueweb09_intent_change.txt" intent_path = "intents" output_fold = "results/SDBN/PDGD/group_mixed_2m" train_set = LetorDataset(dataset_path, FEATURE_SIZE, query_level_norm=True, binary_label=True) intent_paths = [ "{}/1.txt".format(intent_path), "{}/2.txt".format(intent_path), "{}/3.txt".format(intent_path), "{}/4.txt".format(intent_path) ] for click_model in click_models: mp.Process(target=job, args=(click_model, Learning_rate, NUM_INTERACTION, 1, train_set, intent_paths, output_fold, num_groups)).start()
.format(f, model_type, 2, r), "wb") as fp: pickle.dump(cndcg_scores2, fp) with open( "../results/reduction/mq2007/PDGD/fold{}/{}_ranker{}_run{}_final_weight.txt" .format(f, model_type, 2, r), "wb") as fp: pickle.dump(final_weight2, fp) print("PDGD tau{} fold{} {} run{} finished!".format( tau, f, model_type, r)) if __name__ == "__main__": FEATURE_SIZE = 46 NUM_INTERACTION = 10000 # click_models = ["informational", "navigational", "perfect"] click_model = "informational" Learning_rate = 0.1 dataset_fold = "../datasets/2007_mq_dataset" output_fold = "mq2007" # taus = [0.1, 0.5, 1.0, 5.0, 10.0] tau = 1 # for 5 folds for f in range(1, 6): training_path = "{}/Fold{}/train.txt".format(dataset_fold, f) test_path = "{}/Fold{}/test.txt".format(dataset_fold, f) train_set = LetorDataset(training_path, FEATURE_SIZE) test_set = LetorDataset(test_path, FEATURE_SIZE) mp.Process(target=job, args=(click_model, f, train_set, test_set, tau)).start()
if click_model == "perfect": pc = [0.0, 0.2, 0.4, 0.8, 1.0] ps = [0.0, 0.0, 0.0, 0.0, 0.0] elif click_model == "navigational": pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] elif click_model == "informational": pc = [0.4, 0.6, 0.7, 0.8, 0.9] ps = [0.1, 0.2, 0.3, 0.4, 0.5] for f in fold_range: training_path = "{}/Fold{}/train.txt".format( dataset_fold, f) test_path = "{}/Fold{}/test.txt".format(dataset_fold, f) train_set = LetorDataset(training_path, FEATURE_SIZE, query_level_norm=norm, cache_root="../datasets/cache") test_set = LetorDataset(test_path, FEATURE_SIZE, query_level_norm=norm, cache_root="../datasets/cache") print(dataset_fold, click_model, f, batch_size) p = mp.Process(target=job, args=(click_model, f, train_set, test_set, output_fold, batch_size, pc, ps)) p.start() processors.append(p) for p in processors: p.join()
if cm.name == "Mixed": line += s_name + " " line += "\n" f.write(line) # if index % 10000 == 0: # print("write %d/%d queries" % (index, num_queries)) f.close() print(cm.name, "unseen_set finished!") # %% if __name__ == "__main__": # %% train_path = "../datasets/ltrc_yahoo/set1.train.txt" print("loading training set.......") train_set = LetorDataset(train_path, 700) # %% # pc = [0.4, 0.6, 0.7, 0.8, 0.9] # ps = [0.1, 0.2, 0.3, 0.4, 0.5] pc = [0.05, 0.3, 0.5, 0.7, 0.95] ps = [0.2, 0.3, 0.5, 0.7, 0.9] # click_models = [DCTR(pc), SDBN(pc, ps), UBM(pc), SDBN_reverse(pc, ps)] # Mixed_model = Mixed(click_models) # simulators = [DCTR(pc), SDBN(pc, ps), UBM(pc), SDBN_reverse(pc, ps), Mixed_model] simulators = [SDBN_reverse(pc, ps)] for id in range(2, 16): pool = [] for cm in simulators:
NUM_INTERACTION = 100000 click_models = ["informational", "perfect"] # click_models = ["perfect"] # dataset_fold = "../datasets/MSLR10K" # dataset_fold = "../datasets/2007_mq_dataset" output_fold = "results/istella/DBGD" # output_fold = "results/mslr10k/DBGD" # output_fold = "results/yahoo/PMGD" # taus = [0.1, 0.5, 1.0, 5.0, 10.0] alpha = 0.01 delta = 1 num_rankers = 1 # for 5 folds for f in range(1, 2): # training_path = "{}/Fold{}/train.txt".format(dataset_fold, f) # test_path = "{}/Fold{}/test.txt".format(dataset_fold, f) training_path = "../datasets/istella/train.txt" test_path = "../datasets/istella/test.txt" print("loading dataset.....") # training_path = "../datasets/ltrc_yahoo/set1.train.txt" # test_path = "../datasets/ltrc_yahoo/set1.test.txt" train_set = LetorDataset(training_path, FEATURE_SIZE, query_level_norm=True) test_set = LetorDataset(test_path, FEATURE_SIZE, query_level_norm=True) # for 3 click_models for click_model in click_models: p = mp.Process(target=job, args=(click_model, f, train_set, test_set, delta, alpha, FEATURE_SIZE, num_rankers, output_fold)) p.start()
Learning_rate = 0.1 dataset_fold = "../datasets/2007_mq_dataset" output_fold = "mq2007" tau = 1.0 # for 5 folds # for f in range(1, 6): # training_path = "{}/Fold{}/train.txt".format(dataset_fold, f) # test_path = "{}/Fold{}/test.txt".format(dataset_fold, f) # train_set = LetorDataset(training_path, FEATURE_SIZE) # test_set = LetorDataset(test_path, FEATURE_SIZE) # # # for 3 click_models # for click_model in click_models: # mp.Process(target=job, args=(click_model, f, train_set, test_set, tau)).start() training_path = "{}/Fold{}/train.txt".format(dataset_fold, 1) train_set = LetorDataset(training_path, FEATURE_SIZE) lsh = LSHash(2, FEATURE_SIZE, num_hashtables=2) ranker1 = [] ranker2 = [] ranker3 = [] ranker4 = [] print(len(train_set.get_all_querys())) for q in train_set.get_all_querys(): query = np.mean(train_set.get_all_features_by_query(q), axis=0) code = lsh._hash(lsh.uniform_planes[0], query) print(code) print(lsh._hash(lsh.uniform_planes[1], query)) print() if code == '00': ranker1.append(q) elif code == '01':
exist_ok=True) # create directory if not exist with open( "{}/fold{}/{}_run{}_ndcg.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(ndcg_scores, fp) with open( "{}/fold{}/{}_run{}_cndcg.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(cndcg_scores, fp) with open( "{}/fold{}/{}_run{}_weights.txt".format(output_fold, f, model_type, r), "wb") as fp: pickle.dump(final_weights, fp) print("PDGD fold{} {} run{} finished!".format(f, model_type, r)) if __name__ == "__main__": # click_models = ["informational", "navigational", "perfect"] click_models = ["perfect", "informational"] dataset_fold = "../datasets/MSLR10k" output_fold = "../results/exploration/PDGD/MSLR10K/random" # for 5 folds for f in range(1, 2): training_path = "{}/Fold{}/train.txt".format(dataset_fold, f) test_path = "{}/Fold{}/test.txt".format(dataset_fold, f) train_set = LetorDataset(training_path, FEATURE_SIZE, query_level_norm=True, cache_root="../datasets/cache", binary_label=3) test_set = LetorDataset(test_path, FEATURE_SIZE, query_level_norm=True, cache_root="../datasets/cache", binary_label=3) for click_model in click_models: mp.Process(target=job, args=(click_model, f, train_set, test_set, output_fold)).start()