Ejemplo n.º 1
0
def test_fit(python_dataset_ncf, model_type):
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(
        n_users=data.n_users, n_items=data.n_items, model_type=model_type, n_epochs=1
    )
    model.fit(data)
Ejemplo n.º 2
0
def test_predict(python_dataset_ncf, model_type):
    # test data format
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(n_users=data.n_users, n_items=data.n_items, model_type=model_type)
    model.fit(data)

    test_users, test_items = list(test[DEFAULT_USER_COL]), list(test[DEFAULT_ITEM_COL])

    assert type(model.predict(test_users[0], test_items[0])) == float
    
    res = model.predict(test_users, test_items, is_list=True)

    assert type(res) == list
    assert len(res) == len(test)
Ejemplo n.º 3
0
def test_regular_save_load(model_type, n_users, n_items):
    ckpt = ".%s" % model_type
    if os.path.exists(ckpt):
        shutil.rmtree(ckpt)

    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type)
    model.save(ckpt)
    if model.model_type == "neumf":
        P = model.sess.run(model.embedding_gmf_P)
        Q = model.sess.run(model.embedding_mlp_Q)
    elif model.model_type == "gmf":
        P = model.sess.run(model.embedding_gmf_P)
        Q = model.sess.run(model.embedding_gmf_Q)
    elif model.model_type == "mlp":
        P = model.sess.run(model.embedding_mlp_P)
        Q = model.sess.run(model.embedding_mlp_Q)

    del model
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type)

    if model.model_type == "neumf":
        model.load(neumf_dir=ckpt)
        P_ = model.sess.run(model.embedding_gmf_P)
        Q_ = model.sess.run(model.embedding_mlp_Q)
    elif model.model_type == "gmf":
        model.load(gmf_dir=ckpt)
        P_ = model.sess.run(model.embedding_gmf_P)
        Q_ = model.sess.run(model.embedding_gmf_Q)
    elif model.model_type == "mlp":
        model.load(mlp_dir=ckpt)
        P_ = model.sess.run(model.embedding_mlp_P)
        Q_ = model.sess.run(model.embedding_mlp_Q)

    # test load function
    assert np.array_equal(P, P_)
    assert np.array_equal(Q, Q_)

    if os.path.exists(ckpt):
        shutil.rmtree(ckpt)
Ejemplo n.º 4
0
def test_neumf_save_load(n_users, n_items):
    model_type = "gmf"
    ckpt_gmf = ".%s" % model_type
    if os.path.exists(ckpt_gmf):
        shutil.rmtree(ckpt_gmf)
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type)
    model.save(ckpt_gmf)
    P_gmf = model.sess.run(model.embedding_gmf_P)
    Q_gmf = model.sess.run(model.embedding_gmf_Q)
    del model

    model_type = "mlp"
    ckpt_mlp = ".%s" % model_type
    if os.path.exists(ckpt_mlp):
        shutil.rmtree(ckpt_mlp)
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type)
    model.save(".%s" % model_type)
    P_mlp = model.sess.run(model.embedding_mlp_P)
    Q_mlp = model.sess.run(model.embedding_mlp_Q)
    del model

    model_type = "neumf"
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type)
    model.load(gmf_dir=ckpt_gmf, mlp_dir=ckpt_mlp)

    P_gmf_ = model.sess.run(model.embedding_gmf_P)
    Q_gmf_ = model.sess.run(model.embedding_gmf_Q)

    P_mlp_ = model.sess.run(model.embedding_mlp_P)
    Q_mlp_ = model.sess.run(model.embedding_mlp_Q)

    assert np.array_equal(P_gmf, P_gmf_)
    assert np.array_equal(Q_gmf, Q_gmf_)
    assert np.array_equal(Q_mlp, Q_mlp_)
    assert np.array_equal(Q_mlp, Q_mlp_)

    if os.path.exists(ckpt_gmf):
        shutil.rmtree(ckpt_gmf)
    if os.path.exists(ckpt_mlp):
        shutil.rmtree(ckpt_mlp)
Ejemplo n.º 5
0
def test_init(model_type, n_users, n_items):
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type)
    # model type
    assert model.model_type == model_type.lower()
    # number of users in dataset
    assert model.n_users == n_users
    # number of items in dataset
    assert model.n_items == n_items
    # dimension of gmf user embedding
    assert model.embedding_gmf_P.shape == [n_users, model.n_factors]
    # dimension of gmf item embedding
    assert model.embedding_gmf_Q.shape == [n_items, model.n_factors]
    # dimension of mlp user embedding
    assert model.embedding_mlp_P.shape == [n_users, model.n_factors]
    # dimension of mlp item embedding
    assert model.embedding_mlp_Q.shape == [n_items, model.n_factors]
Ejemplo n.º 6
0
def test_predict(python_dataset_ncf, model_type):
    # test data format
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(
        n_users=data.n_users, n_items=data.n_items, model_type=model_type, n_epochs=1
    )
    model.fit(data)

    test_users, test_items = list(test[DEFAULT_USER_COL]), list(test[DEFAULT_ITEM_COL])

    assert type(model.predict(test_users[0], test_items[0])) == float

    res = model.predict(test_users, test_items, is_list=True)

    assert type(res) == list
    assert len(res) == len(test)
Ejemplo n.º 7
0
def test_regular_save_load(model_type, n_users, n_items):
    ckpt = ".%s" % model_type
    if os.path.exists(ckpt):
        shutil.rmtree(ckpt)

    model = NCF(
        n_users=n_users, n_items=n_items, model_type=model_type, n_epochs=1, seed=SEED
    )
    model.save(ckpt)
    if model.model_type == "neumf":
        P = model.sess.run(model.embedding_gmf_P)
        Q = model.sess.run(model.embedding_mlp_Q)
    elif model.model_type == "gmf":
        P = model.sess.run(model.embedding_gmf_P)
        Q = model.sess.run(model.embedding_gmf_Q)
    elif model.model_type == "mlp":
        P = model.sess.run(model.embedding_mlp_P)
        Q = model.sess.run(model.embedding_mlp_Q)

    del model
    model = NCF(
        n_users=n_users, n_items=n_items, model_type=model_type, n_epochs=1, seed=SEED
    )

    if model.model_type == "neumf":
        model.load(neumf_dir=ckpt)
        P_ = model.sess.run(model.embedding_gmf_P)
        Q_ = model.sess.run(model.embedding_mlp_Q)
    elif model.model_type == "gmf":
        model.load(gmf_dir=ckpt)
        P_ = model.sess.run(model.embedding_gmf_P)
        Q_ = model.sess.run(model.embedding_gmf_Q)
    elif model.model_type == "mlp":
        model.load(mlp_dir=ckpt)
        P_ = model.sess.run(model.embedding_mlp_P)
        Q_ = model.sess.run(model.embedding_mlp_Q)

    # test load function
    assert np.array_equal(P, P_)
    assert np.array_equal(Q, Q_)

    if os.path.exists(ckpt):
        shutil.rmtree(ckpt)
Ejemplo n.º 8
0
def test_neumf_save_load(n_users, n_items):
    model_type = "gmf"
    ckpt_gmf = ".%s" % model_type
    if os.path.exists(ckpt_gmf):
        shutil.rmtree(ckpt_gmf)
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type, n_epochs=1)
    model.save(ckpt_gmf)
    P_gmf = model.sess.run(model.embedding_gmf_P)
    Q_gmf = model.sess.run(model.embedding_gmf_Q)
    del model

    model_type = "mlp"
    ckpt_mlp = ".%s" % model_type
    if os.path.exists(ckpt_mlp):
        shutil.rmtree(ckpt_mlp)
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type, n_epochs=1)
    model.save(".%s" % model_type)
    P_mlp = model.sess.run(model.embedding_mlp_P)
    Q_mlp = model.sess.run(model.embedding_mlp_Q)
    del model

    model_type = "neumf"
    model = NCF(n_users=n_users, n_items=n_items, model_type=model_type, n_epochs=1)
    model.load(gmf_dir=ckpt_gmf, mlp_dir=ckpt_mlp)

    P_gmf_ = model.sess.run(model.embedding_gmf_P)
    Q_gmf_ = model.sess.run(model.embedding_gmf_Q)

    P_mlp_ = model.sess.run(model.embedding_mlp_P)
    Q_mlp_ = model.sess.run(model.embedding_mlp_Q)

    assert np.array_equal(P_gmf, P_gmf_)
    assert np.array_equal(Q_gmf, Q_gmf_)
    assert np.array_equal(P_mlp, P_mlp_)
    assert np.array_equal(Q_mlp, Q_mlp_)

    if os.path.exists(ckpt_gmf):
        shutil.rmtree(ckpt_gmf)
    if os.path.exists(ckpt_mlp):
        shutil.rmtree(ckpt_mlp)
Ejemplo n.º 9
0
def test_fit(python_dataset_ncf, model_type):
    train, test = python_dataset_ncf
    data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST)
    model = NCF(n_users=data.n_users, n_items=data.n_items, model_type=model_type)
    model.fit(data)
Ejemplo n.º 10
0
def ncf_training(params):
    """
    Train NCF using the given hyper-parameters
    """
    logger.debug("Start training...")
    train_data = pd.read_pickle(
        path=os.path.join(params["datastore"], params["train_datapath"])
    )
    validation_data = pd.read_pickle(
        path=os.path.join(params["datastore"], params["validation_datapath"])
    )

    data = NCFDataset(train=train_data, test=validation_data, seed=DEFAULT_SEED)

    model = NCF(
        n_users=data.n_users,
        n_items=data.n_items,
        model_type="NeuMF",
        n_factors=params["n_factors"],
        layer_sizes=[16, 8, 4],
        n_epochs=params["n_epochs"],
        learning_rate=params["learning_rate"],
        verbose=params["verbose"],
        seed=DEFAULT_SEED,
    )

    model.fit(data)

    logger.debug("Evaluating...")

    metrics_dict = {}
    rating_metrics = params["rating_metrics"]
    if len(rating_metrics) > 0:
        predictions = [
            [row.userID, row.itemID, model.predict(row.userID, row.itemID)]
            for (_, row) in validation_data.iterrows()
        ]

        predictions = pd.DataFrame(
            predictions, columns=["userID", "itemID", "prediction"]
        )
        predictions = predictions.astype(
            {"userID": "int64", "itemID": "int64", "prediction": "float64"}
        )

        for metric in rating_metrics:
            result = getattr(evaluation, metric)(validation_data, predictions)
            metrics_dict = _update_metrics(metrics_dict, metric, params, result)

    ranking_metrics = params["ranking_metrics"]
    if len(ranking_metrics) > 0:
        users, items, preds = [], [], []
        item = list(train_data.itemID.unique())
        for user in train_data.userID.unique():
            user = [user] * len(item)
            users.extend(user)
            items.extend(item)
            preds.extend(list(model.predict(user, item, is_list=True)))

        all_predictions = pd.DataFrame(
            data={"userID": users, "itemID": items, "prediction": preds}
        )

        merged = pd.merge(
            train_data, all_predictions, on=["userID", "itemID"], how="outer"
        )
        all_predictions = merged[merged.rating.isnull()].drop("rating", axis=1)
        for metric in ranking_metrics:
            result = getattr(evaluation, metric)(
                validation_data,
                all_predictions,
                col_prediction="prediction",
                k=params["k"],
            )
            metrics_dict = _update_metrics(metrics_dict, metric, params, result)


    if len(ranking_metrics) == 0 and len(rating_metrics) == 0:
        raise ValueError("No metrics were specified.")

    # Report the metrics
    nni.report_final_result(metrics_dict)

    # Save the metrics in a JSON file
    output_dir = os.environ.get("NNI_OUTPUT_DIR")
    with open(os.path.join(output_dir, "metrics.json"), "w") as fp:
        temp_dict = metrics_dict.copy()
        temp_dict[params["primary_metric"]] = temp_dict.pop("default")
        json.dump(temp_dict, fp)

    return model
def train_ncf(params, data):
    model = NCF(n_users=data.n_users, n_items=data.n_items, **params)
    with Timer() as t:
        model.fit(data)
    return model, t
Ejemplo n.º 12
0
def train_ncf(params, data):
    model = NCF(n_users=data.n_users, n_items=data.n_items, **params)
    with Timer() as t:
        model.fit(data)
    return model, t