Beispiel #1
0
from dataset import LetorDataset
import numpy as np
from clickModel.LSTMv2 import LSTMv2
from utils import read_file as rf
from clickModel.DCTR import DCTR

train_path = "../datasets/ltrc_yahoo/test_set.txt"
print("loading training set.......")
train_set = LetorDataset(train_path, 700)

click_log_path = "../datasets/ltrc_yahoo/test_click_log.txt"
test_click_log_path = "../datasets/ltrc_yahoo/test_click_log_test.txt"
click_log = rf.read_click_log(click_log_path)
test_click_log = rf.read_click_log(test_click_log_path)

pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
simulator = DCTR(pc)
print(click_log.shape)
print(test_click_log.shape)
#
click_model = LSTMv2(700, 1024, train_set)
click_model.train(click_log)
print(
    click_model.get_MSE(
        test_click_log[np.random.choice(test_click_log.shape[0], 100)],
        train_set, simulator))
Beispiel #2
0
if __name__ == "__main__":
    # %%
    train_path = "../datasets/ltrc_yahoo/set1.train.txt"
    test_path = "../datasets/ltrc_yahoo/set1.test.txt"
    print("loading training set.......")
    train_set = LetorDataset(train_path, 700)
    print("loading testing set.......")
    test_set = LetorDataset(test_path, 700)
    # %%
    # pc = [0.4, 0.6, 0.7, 0.8, 0.9]
    # ps = [0.1, 0.2, 0.3, 0.4, 0.5]
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    for id in range(1, 16):
        p1 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, DCTR(pc),
                              "../feature_click_datasets/DCTR/", id))
        p2 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, CM(pc),
                              "../feature_click_datasets/CM/", id))
        p3 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, SDBN(pc, ps),
                              "../feature_click_datasets/SDBN/", id))
        p4 = mp.Process(target=generate_dataset,
                        args=(train_set, test_set, SDCM(pc),
                              "../feature_click_datasets/SDCM/", id))

        p1.start()
        p2.start()
        p3.start()
        p4.start()
Beispiel #3
0

# %%
if __name__ == "__main__":
    # %%
    train_path = "../datasets/ltrc_yahoo/set1.train.txt"
    test_path = "../datasets/ltrc_yahoo/set1.test.txt"
    print("loading training set.......")
    train_set = LetorDataset(train_path, 700)
    # %%
    # print("loading testing set.......")
    # test_set = LetorDataset(test_path, 700)
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    datasets_simulator = [('SDBN', SDBN(pc, ps)), ('SDCM', SDCM(pc)),
                          ('CM', CM(pc)), ('DCTR', DCTR(pc))]
    # datasets = ['CM']
    for dataset, simulator in datasets_simulator:
        for id in range(1, 2):
            click_log_path = "../feature_click_datasets/{}/train_set{}.txt".format(
                dataset, id)
            test_click_log_path = "../feature_click_datasets/{}/seen_set{}.txt".format(
                dataset, id)
            query_frequency_path = "../feature_click_datasets/{}/query_frequency{}.txt".format(
                dataset, id)
            click_log = rf.read_click_log(click_log_path)
            test_click_log = rf.read_click_log(test_click_log_path)
            query_frequency = rf.read_query_frequency(query_frequency_path)

            click_models = [SDBN(), SDCM(), CM(), DCTR()]
Beispiel #4
0
# %%
if __name__ == "__main__":
    # %%
    train_path = "../datasets/ltrc_yahoo/set1.LetorDataset.txt"
    print("loading training set.......")
    with open(train_path, "rb") as fp:
        train_set = pickle.load(fp)
    # %%
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]

    datasets_simulator = [
        ('SDBN', SDBN(pc, ps)),
        # ('SDCM', SDCM(pc)),
        # ('CM', CM(pc)),
        ('DCTR', DCTR(pc)),
        ('UBM', UBM(pc)),
        # ('SDBN_reverse', SDBN_reverse(pc, ps))
    ]

    progress = 0
    for dataset, simulator in datasets_simulator:
        for id in range(2, 16):
            click_log_path = "../click_logs/{}/train_set{}.txt".format(
                dataset, id)
            test_click_log_path = "../click_logs/{}/seen_set{}.txt".format(
                dataset, id)
            query_frequency_path = "../click_logs/{}/query_frequency{}.txt".format(
                dataset, id)
            click_log = rf.read_click_log(click_log_path)
            test_click_log = rf.read_click_log(test_click_log_path)
generator = "Mixed"

click_log_path = "../feature_click_datasets/{}/train_set1.txt".format(generator)
test_click_log_path =  "../feature_click_datasets/{}/seen_set1.txt".format(generator)
click_log = rf.read_click_log(click_log_path)
test_click_log = rf.read_click_log(test_click_log_path)


# #
dataset = tf.data.TFRecordDataset(filenames='../feature_click_datasets/{}/train_set1.tfrecord'.format(generator))
# # # test_dataset = tf.data.TFRecordDataset(filenames='../feature_click_datasets/SDBN/seen_set1.tfrecord')
# # #%%
pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
Mixed_models = [DCTR(pc), CM(pc), SDBN(pc, ps), SDCM(pc), UBM(pc)]
simulator = Mixed(Mixed_models)
print(click_log.shape)
print(test_click_log.shape)
#
click_model = LSTMv2(700, 1024, train_set, batch_size=128, epoch=5)
print(click_model.get_MSE(test_click_log[np.random.choice(test_click_log.shape[0], 1000)], train_set, simulator))
click_model.train(dataset)

print(click_model.get_MSE(test_click_log[np.random.choice(test_click_log.shape[0], 1000)], train_set, simulator))

click_model.model.save("../click_model_results/LSTM_models/{}_train_set1.h5".format(generator))



# test model
Beispiel #6
0
    for freq in frequencies:
        perplexities = click_model.get_perplexity(np.array(test_logs[freq]))
        MSEs = click_model.get_MSE(np.array(test_logs[freq]), dataset, simulator)

        perplexity_line = "Frequency " + freq + " perplexities:"
        MSEs_line = "Frequency " + freq + " MSE:"
        for perp in perplexities:
            perplexity_line += " " + str(perp)
        for MSE in MSEs:
            MSEs_line += " " + str(MSE)
        f.write(perplexity_line + "\n")
        f.write(MSEs_line + "\n")

    f.close()

if __name__ == "__main__":
    pc = [0.05, 0.3, 0.5, 0.7, 0.95]
    ps = [0.2, 0.3, 0.5, 0.7, 0.9]
    Mixed_models = [DCTR(pc), SDBN(pc, ps), UBM(pc)]
    # simulators = [SDBN(pc, ps), Mixed(Mixed_models), DCTR(pc), UBM(pc)]
    simulators = [SDBN(pc, ps), DCTR(pc), UBM(pc)]

    dataset_path = "../datasets/ltrc_yahoo/set1.train.txt"
    print("loading training set.......")
    dataset = LetorDataset(dataset_path, 700)

    for r in range(1, 2):
        for simulator in simulators:
            run(simulator, dataset, r)
from clickModel.RCTR import RCTR
from clickModel.Mixed import Mixed
from utils import read_file as rf
from utils import utility
from dataset import LetorDataset
# import matplotlib.pyplot as plt
import numpy as np
import multiprocessing as mp

train_path = "../datasets/ltrc_yahoo/set1.train.txt"
print("loading training set.......")
train_set = LetorDataset(train_path, 700)

pc = [0.05, 0.3, 0.5, 0.7, 0.95]
ps = [0.2, 0.3, 0.5, 0.7, 0.9]
mixed_models = [DCTR(pc), SDBN(pc, ps), UBM(pc)]
datasets_simulator = [
    ('SDBN', SDBN(pc, ps)),
    # ('SDCM', SDCM(pc)),
    # ('CM', CM(pc)),
    ('DCTR', DCTR(pc)),
    ('UBM', UBM(pc)),
    ('SDBN_reverse', SDBN_reverse(pc, ps))
]
click_model = RCTR()

for dataset, simulator in datasets_simulator:
    for id in range(1, 16):
        click_log_path = "../click_logs/{}/train_set{}.txt".format(dataset, id)
        click_log = rf.read_click_log(click_log_path)
        click_model.train(click_log)