def __init__(self, config): self.config = config self.meta_files = dict() self.all_asin_to_titles = dict() for meta in self.config['metas']: data = self.config[meta] pp = data['pp'] self.meta_files[meta] = {} self.meta_files[meta][ 'bought'] = pp + '/' + meta + '_meta_id_bought_mapping.pt' self.meta_files[meta][ 'id'] = pp + '/' + meta + '_meta_id_item_mapping.pt' self.meta_files[meta][ 'cat'] = pp + '/' + meta + '_meta_id_cat_map.pt' self.meta_files[meta]['idx'] = load_dict_output( pp, 'item_id_map.json') self.meta_files[meta]['id_item'] = load_dict_output( pp, 'id_item_map.json')
def create_title_id_meta_mapping(name, pp_data_dir, meta_df): records = meta_df.to_dict('record') item_mapping = load_dict_output(pp_data_dir, 'id_item_map.json') asin_mapping = dict() name = name for record in records: asin_mapping[record['asin']] = record['title'] item_mapping.update(asin_mapping) with open(pp_data_dir + '/' + name + '_meta_id_item_mapping.pt', 'wb') as f: pickle.dump(item_mapping, f)
} print("Reading dataset") if args.dataset == "movielens": data_dir = cfg.vals['movielens_dir'] + "/preprocessed_choice/" elif args.dataset == "amazon": data_dir = cfg.vals['amazon_dir'] + "/preprocessed_choice/" else: raise ValueError("--dataset must be 'amazon' or 'movielens'") X_train, X_test, y_train, y_test = read_train_test_dir(data_dir) print("Dataset read complete...") user_item_rating_map = load_dict_output(data_dir, "user_item_rating.json", True) item_rating_map = load_dict_output(data_dir, "item_rating.json", True) stats = load_dict_output(data_dir, "stats.json") print("n users: {}".format(stats['n_users'])) print("n items: {}".format(stats['n_items'])) n_test = get_test_sample_size(X_test.shape[0], k=TEST_BATCH_SIZE) X_test = X_test[:n_test, :] y_test = y_test[:n_test, :] mlp = MLP({ 'num_users': stats['n_users'], 'num_items': stats['n_items'], 'latent_dim': params["h_dim_size"], 'use_cuda': args.cuda,
import torch import pandas as pd import numpy as np from model.trainer import NeuralUtilityTrainer from model._loss import loss_mse from sklearn.preprocessing import OneHotEncoder from experiments.utils import get_mrs_arr, get_supp_k, get_comp_k from preprocessing.utils import load_dict_output # Set model and data paths manuall # TODO: Ahmad to modify for his application MODEL_PATH = cfg.vals[ "model_dir"] + "/home_kitchen_ahmad/item_encoder_amazon_utility_done.pt" data_dir = cfg.vals['amazon_dir'] + "/preprocessed_home_kitchen/" item_map = load_dict_output( cfg.vals["amazon_dir"] + "/preprocessed_home_kitchen/", "id_item_map.json", True) item_to_idx = load_dict_output( cfg.vals["amazon_dir"] + "/preprocessed_home_kitchen/", "item_id_map.json", True) # Process data X_train, X_test, y_train, y_test = read_train_test_dir(data_dir) stats = load_dict_output(data_dir, "stats.json") one_hot = OneHotEncoder(categories=[range(stats["n_items"])]) items_for_grad = one_hot.fit_transform( np.arange(stats["n_items"]).reshape(-1, 1)).todense().astype(np.float32) train = np.concatenate([X_train, y_train], axis=1) test = np.concatenate([X_test, y_test], axis=1)
print("Reading dataset") if args.dataset == "movielens": data_dir = cfg.vals['movielens_dir'] + "/preprocessed/" elif args.dataset == "amazon": data_dir = cfg.vals['amazon_dir'] + "/preprocessed/" else: raise ValueError("--dataset must be 'amazon' or 'movielens'") X_train, X_test, y_train, y_test = read_train_test_dir(data_dir) stats = load_dict_output(data_dir, "stats.json") print("Dataset read complete...") n_test = get_test_sample_size(X_test.shape[0], k=TEST_BATCH_SIZE) X_test = X_test[:n_test, :] y_test = y_test[:n_test, :] users_test = X_test[:, 0].reshape(-1,1) items_test = X_test[:, 1].reshape(-1,1) y_test = y_test.reshape(-1,1) predictor = Predictor(model=model, batch_size=TEST_BATCH_SIZE, users=users_test, items=items_test, y=y_test, use_cuda=args.cuda, n_items=stats["n_items"])