Ejemplo n.º 1
0
def test_sampler():
    batch_size = 8
    maxlen = 50
    data_dir = os.path.join("tests", "resources", "deeprec", "sasrec")
    dataset = "reviews_Electronics_5"
    reviews_name = dataset + ".json"
    outfile = os.path.join(data_dir, dataset + ".txt")

    reviews_file = os.path.join(data_dir, reviews_name)
    download_and_extract(reviews_name, reviews_file)
    reviews_output = _reviews_preprocessing(reviews_file)
    _, _ = data_process_with_time(reviews_output, outfile, K=10, sep="\t")

    # initiate a dataset class
    data = SASRecDataSet(filename=outfile, col_sep="\t")

    # create train, validation and test splits
    data.split()

    sampler = WarpSampler(
        data.user_train,
        data.usernum,
        data.itemnum,
        batch_size=batch_size,
        maxlen=maxlen,
        n_workers=3,
    )
    u, seq, pos, neg = sampler.next_batch()

    assert len(u) == batch_size
    assert len(seq) == batch_size
    assert len(pos) == batch_size
    assert len(neg) == batch_size
Ejemplo n.º 2
0
def test_model_sum(deeprec_resource_path, deeprec_config_path):
    data_path = os.path.join(deeprec_resource_path, "slirec")
    yaml_file = os.path.join(deeprec_config_path, "sum.yaml")
    train_file = os.path.join(data_path, r"train_data")
    valid_file = os.path.join(data_path, r"valid_data")
    test_file = os.path.join(data_path, r"test_data")
    output_file = os.path.join(data_path, "output.txt")
    train_num_ngs = (
        4  # number of negative instances with a positive instance for training
    )
    valid_num_ngs = (
        4  # number of negative instances with a positive instance for validation
    )
    test_num_ngs = (
        9  # number of negative instances with a positive instance for testing
    )

    if not os.path.exists(train_file):
        user_vocab = os.path.join(data_path, r"user_vocab.pkl")
        item_vocab = os.path.join(data_path, r"item_vocab.pkl")
        cate_vocab = os.path.join(data_path, r"category_vocab.pkl")
        reviews_name = "reviews_Movies_and_TV_5.json"
        meta_name = "meta_Movies_and_TV.json"
        reviews_file = os.path.join(data_path, reviews_name)
        meta_file = os.path.join(data_path, meta_name)
        sample_rate = (
            0.005  # sample a small item set for training and testing here for example
        )

        input_files = [
            reviews_file,
            meta_file,
            train_file,
            valid_file,
            test_file,
            user_vocab,
            item_vocab,
            cate_vocab,
        ]
        download_and_extract(reviews_name, reviews_file)
        download_and_extract(meta_name, meta_file)
        data_preprocessing(
            *input_files,
            sample_rate=sample_rate,
            valid_num_ngs=valid_num_ngs,
            test_num_ngs=test_num_ngs
        )

    hparams = prepare_hparams(
        yaml_file, learning_rate=0.01, epochs=1, train_num_ngs=train_num_ngs
    )
    assert hparams is not None

    input_creator = SequentialIterator
    model = SUMModel(hparams, input_creator)
    assert model.run_eval(valid_file, num_ngs=valid_num_ngs) is not None
    assert isinstance(
        model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs), BaseModel
    )
    assert model.predict(valid_file, output_file) is not None
Ejemplo n.º 3
0
def test_Sequential_Iterator(deeprec_resource_path, deeprec_config_path):
    data_path = os.path.join(deeprec_resource_path, "slirec")
    yaml_file = os.path.join(deeprec_config_path, "sli_rec.yaml")
    train_file = os.path.join(data_path, r"train_data")

    if not os.path.exists(train_file):
        valid_file = os.path.join(data_path, r"valid_data")
        test_file = os.path.join(data_path, r"test_data")
        user_vocab = os.path.join(data_path, r"user_vocab.pkl")
        item_vocab = os.path.join(data_path, r"item_vocab.pkl")
        cate_vocab = os.path.join(data_path, r"category_vocab.pkl")

        reviews_name = "reviews_Movies_and_TV_5.json"
        meta_name = "meta_Movies_and_TV.json"
        reviews_file = os.path.join(data_path, reviews_name)
        meta_file = os.path.join(data_path, meta_name)
        valid_num_ngs = (
            4  # number of negative instances with a positive instance for validation
        )
        test_num_ngs = (
            9  # number of negative instances with a positive instance for testing
        )
        sample_rate = (
            0.01  # sample a small item set for training and testing here for example
        )

        input_files = [
            reviews_file,
            meta_file,
            train_file,
            valid_file,
            test_file,
            user_vocab,
            item_vocab,
            cate_vocab,
        ]
        download_and_extract(reviews_name, reviews_file)
        download_and_extract(meta_name, meta_file)
        data_preprocessing(*input_files,
                           sample_rate=sample_rate,
                           valid_num_ngs=valid_num_ngs,
                           test_num_ngs=test_num_ngs)

    hparams = prepare_hparams(yaml_file)
    iterator = SequentialIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(train_file):
        assert isinstance(res, dict)
Ejemplo n.º 4
0
def sequential_files(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "slirec")
    train_file = os.path.join(data_path, r"train_data")
    valid_file = os.path.join(data_path, r"valid_data")
    test_file = os.path.join(data_path, r"test_data")
    user_vocab = os.path.join(data_path, r"user_vocab.pkl")
    item_vocab = os.path.join(data_path, r"item_vocab.pkl")
    cate_vocab = os.path.join(data_path, r"category_vocab.pkl")

    reviews_name = "reviews_Movies_and_TV_5.json"
    meta_name = "meta_Movies_and_TV.json"
    reviews_file = os.path.join(data_path, reviews_name)
    meta_file = os.path.join(data_path, meta_name)
    valid_num_ngs = (
        4  # number of negative instances with a positive instance for validation
    )
    test_num_ngs = (
        9  # number of negative instances with a positive instance for testing
    )
    sample_rate = (
        0.01  # sample a small item set for training and testing here for example
    )

    input_files = [
        reviews_file,
        meta_file,
        train_file,
        valid_file,
        test_file,
        user_vocab,
        item_vocab,
        cate_vocab,
    ]
    download_and_extract(reviews_name, reviews_file)
    download_and_extract(meta_name, meta_file)
    data_preprocessing(*input_files,
                       sample_rate=sample_rate,
                       valid_num_ngs=valid_num_ngs,
                       test_num_ngs=test_num_ngs)

    return (
        data_path,
        user_vocab,
        item_vocab,
        cate_vocab,
    )
Ejemplo n.º 5
0
def test_prepare_data():
    data_dir = os.path.join("tests", "resources", "deeprec", "sasrec")
    dataset = "reviews_Electronics_5"
    reviews_name = dataset + ".json"
    outfile = os.path.join(data_dir, dataset + ".txt")

    reviews_file = os.path.join(data_dir, reviews_name)
    download_and_extract(reviews_name, reviews_file)
    reviews_output = _reviews_preprocessing(reviews_file)
    _, _ = data_process_with_time(reviews_output, outfile, K=10, sep="\t")

    # initiate a dataset class
    data = SASRecDataSet(filename=outfile, col_sep="\t")

    # create train, validation and test splits
    data.split()

    assert len(data.user_train) > 0
    assert len(data.user_valid) > 0
    assert len(data.user_test) > 0