Example #1
0
def test_stratified_splitter(test_specs, python_dataset):
    splits = python_stratified_split(python_dataset,
                                     ratio=test_specs["ratio"],
                                     min_rating=10,
                                     filter_by="user")

    assert len(splits[0]) / test_specs["number_of_rows"] == pytest.approx(
        test_specs["ratio"], test_specs["tolerance"])
    assert len(splits[1]) / test_specs["number_of_rows"] == pytest.approx(
        1 - test_specs["ratio"], test_specs["tolerance"])

    for split in splits:
        assert set(split.columns) == set(python_dataset.columns)

    # Test if both contains the same user list. This is because stratified split is stratified.
    users_train = splits[0][DEFAULT_USER_COL].unique()
    users_test = splits[1][DEFAULT_USER_COL].unique()

    assert set(users_train) == set(users_test)

    splits = python_stratified_split(python_dataset,
                                     ratio=test_specs["ratios"],
                                     min_rating=10,
                                     filter_by="user")

    assert len(splits) == 3
    assert len(splits[0]) / test_specs["number_of_rows"] == pytest.approx(
        test_specs["ratios"][0], test_specs["tolerance"])
    assert len(splits[1]) / test_specs["number_of_rows"] == pytest.approx(
        test_specs["ratios"][1], test_specs["tolerance"])
    assert len(splits[2]) / test_specs["number_of_rows"] == pytest.approx(
        test_specs["ratios"][2], test_specs["tolerance"])

    for split in splits:
        assert set(split.columns) == set(python_dataset.columns)
def test_model_lightgcn(deeprec_resource_path, deeprec_config_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml")
    user_file = os.path.join(data_path, r"user_embeddings.csv")
    item_file = os.path.join(data_path, r"item_embeddings.csv")

    df = movielens.load_pandas_df(size="100k")
    train, test = python_stratified_split(df, ratio=0.75)

    data = ImplicitCF(train=train, test=test)

    hparams = prepare_hparams(yaml_file, epochs=1)
    model = LightGCN(hparams, data)

    assert model.run_eval() is not None
    model.fit()
    assert model.recommend_k_items(test) is not None
    model.infer_embedding(user_file, item_file)
    assert os.path.getsize(user_file) != 0
    assert os.path.getsize(item_file) != 0
def test_lightgcn_component_definition(deeprec_config_path):
    yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml")

    df = movielens.load_pandas_df(size="100k")
    train, test = python_stratified_split(df, ratio=0.75)

    data = ImplicitCF(train=train, test=test)

    embed_size = 64
    hparams = prepare_hparams(yaml_file, embed_size=embed_size)
    model = LightGCN(hparams, data)

    assert model.norm_adj is not None
    assert model.ua_embeddings.shape == [data.n_users, embed_size]
    assert model.ia_embeddings.shape == [data.n_items, embed_size]
    assert model.u_g_embeddings is not None
    assert model.pos_i_g_embeddings is not None
    assert model.neg_i_g_embeddings is not None
    assert model.batch_ratings is not None
    assert model.loss is not None
    assert model.opt is not None
Example #4
0
    seed = args.seed

    logger.debug(f"Received parameters:")
    logger.debug(f"Ratio:    {ratio}")
    logger.debug(f"User:    {col_user}")
    logger.debug(f"Item:    {col_item}")
    logger.debug(f"Seed:    {seed}")

    logger.debug(f"Input path: {args.input_path}")
    logger.debug(f"Shape of loaded DataFrame: {input_df.shape}")
    logger.debug(f"Cols of DataFrame: {input_df.columns}")

    output_train, output_test = python_stratified_split(
        input_df,
        ratio=args.ratio,
        col_user=args.col_user,
        col_item=args.col_item,
        seed=args.seed,
    )

    logger.debug(f"Output path: {args.output_train}")
    logger.debug(f"Output path: {args.output_test}")

    save_data_frame_to_directory(
        args.output_train,
        output_train,
        schema=DataFrameSchema.data_frame_to_dict(output_train),
    )
    save_data_frame_to_directory(
        args.output_test,
        output_test,