def test_fit(python_dataset_ncf, model_type): train, test = python_dataset_ncf data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST) model = NCF( n_users=data.n_users, n_items=data.n_items, model_type=model_type, n_epochs=1 ) model.fit(data)
def test_predict(python_dataset_ncf, model_type): # test data format train, test = python_dataset_ncf data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST) model = NCF(n_users=data.n_users, n_items=data.n_items, model_type=model_type) model.fit(data) test_users, test_items = list(test[DEFAULT_USER_COL]), list(test[DEFAULT_ITEM_COL]) assert type(model.predict(test_users[0], test_items[0])) == float res = model.predict(test_users, test_items, is_list=True) assert type(res) == list assert len(res) == len(test)
def test_predict(python_dataset_ncf, model_type): # test data format train, test = python_dataset_ncf data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST) model = NCF( n_users=data.n_users, n_items=data.n_items, model_type=model_type, n_epochs=1 ) model.fit(data) test_users, test_items = list(test[DEFAULT_USER_COL]), list(test[DEFAULT_ITEM_COL]) assert type(model.predict(test_users[0], test_items[0])) == float res = model.predict(test_users, test_items, is_list=True) assert type(res) == list assert len(res) == len(test)
def test_fit(python_dataset_ncf, model_type): train, test = python_dataset_ncf data = Dataset(train=train, test=test, n_neg=N_NEG, n_neg_test=N_NEG_TEST) model = NCF(n_users=data.n_users, n_items=data.n_items, model_type=model_type) model.fit(data)
def ncf_training(params): """ Train NCF using the given hyper-parameters """ logger.debug("Start training...") train_data = pd.read_pickle( path=os.path.join(params["datastore"], params["train_datapath"]) ) validation_data = pd.read_pickle( path=os.path.join(params["datastore"], params["validation_datapath"]) ) data = NCFDataset(train=train_data, test=validation_data, seed=DEFAULT_SEED) model = NCF( n_users=data.n_users, n_items=data.n_items, model_type="NeuMF", n_factors=params["n_factors"], layer_sizes=[16, 8, 4], n_epochs=params["n_epochs"], learning_rate=params["learning_rate"], verbose=params["verbose"], seed=DEFAULT_SEED, ) model.fit(data) logger.debug("Evaluating...") metrics_dict = {} rating_metrics = params["rating_metrics"] if len(rating_metrics) > 0: predictions = [ [row.userID, row.itemID, model.predict(row.userID, row.itemID)] for (_, row) in validation_data.iterrows() ] predictions = pd.DataFrame( predictions, columns=["userID", "itemID", "prediction"] ) predictions = predictions.astype( {"userID": "int64", "itemID": "int64", "prediction": "float64"} ) for metric in rating_metrics: result = getattr(evaluation, metric)(validation_data, predictions) metrics_dict = _update_metrics(metrics_dict, metric, params, result) ranking_metrics = params["ranking_metrics"] if len(ranking_metrics) > 0: users, items, preds = [], [], [] item = list(train_data.itemID.unique()) for user in train_data.userID.unique(): user = [user] * len(item) users.extend(user) items.extend(item) preds.extend(list(model.predict(user, item, is_list=True))) all_predictions = pd.DataFrame( data={"userID": users, "itemID": items, "prediction": preds} ) merged = pd.merge( train_data, all_predictions, on=["userID", "itemID"], how="outer" ) all_predictions = merged[merged.rating.isnull()].drop("rating", axis=1) for metric in ranking_metrics: result = getattr(evaluation, metric)( validation_data, all_predictions, col_prediction="prediction", k=params["k"], ) metrics_dict = _update_metrics(metrics_dict, metric, params, result) if len(ranking_metrics) == 0 and len(rating_metrics) == 0: raise ValueError("No metrics were specified.") # Report the metrics nni.report_final_result(metrics_dict) # Save the metrics in a JSON file output_dir = os.environ.get("NNI_OUTPUT_DIR") with open(os.path.join(output_dir, "metrics.json"), "w") as fp: temp_dict = metrics_dict.copy() temp_dict[params["primary_metric"]] = temp_dict.pop("default") json.dump(temp_dict, fp) return model
def train_ncf(params, data): model = NCF(n_users=data.n_users, n_items=data.n_items, **params) with Timer() as t: model.fit(data) return model, t