def __init__(self, path): ''' Constructor ''' trainMatrix = self.load_rating_file_as_matrix(path + ".train.rating") testRatings = self.load_rating_file_as_matrix(path + ".test.rating") testNegatives = self.load_negative_file(path + ".test.negative") assert len(testRatings) == len(testNegatives) self.num_users, self.num_items = trainMatrix.shape from Base.Recommender_utils import reshapeSparse self.URM_train = trainMatrix.tocsr() self.URM_test = testRatings.tocsr() shape = (max(self.URM_train.shape[0], self.URM_test.shape[0]), max(self.URM_train.shape[1], self.URM_test.shape[1])) self.URM_train = reshapeSparse(self.URM_train, shape) self.URM_test = reshapeSparse(self.URM_test, shape) URM_test_negatives_builder = IncrementalSparseMatrix(n_rows=shape[0], n_cols=shape[1]) for user_index in range(len(testNegatives)): user_test_items = testNegatives[user_index] URM_test_negatives_builder.add_single_row(user_index, user_test_items, data=1.0) self.URM_test_negative = URM_test_negatives_builder.get_SparseMatrix()
def split_train_validation_test_VAE_CF(URM_dataframe, n_heldout_users): split_dir = "./result_experiments/__Temp_MultiVAE_Splitter/" split_train_validation_test_VAE_CF_original(URM_dataframe, split_dir, n_heldout_users) train_data, vad_data_tr, vad_data_te, test_data_tr, test_data_te, n_items = load_data_VAE_CF(split_dir) # Remove temp files shutil.rmtree(split_dir, ignore_errors=True) from Base.Recommender_utils import reshapeSparse URM_train_only = train_data.copy() URM_train_all = sps.vstack([train_data, vad_data_tr, test_data_tr]) URM_train_all_shape = URM_train_all.shape ## OFFSET all row indices n_train_users = train_data.shape[0] URM_validation = offset_sparse_matrix_row(vad_data_te, n_train_users) n_train_and_validation_users = URM_validation.shape[0] URM_validation = reshapeSparse(URM_validation, URM_train_all_shape) URM_test = offset_sparse_matrix_row(test_data_te, n_train_and_validation_users) URM_test = reshapeSparse(URM_test, URM_train_all_shape) URM_train_only = sps.csr_matrix(URM_train_only) URM_train_all = sps.csr_matrix(URM_train_all) URM_validation = sps.csr_matrix(URM_validation) URM_test = sps.csr_matrix(URM_test) return URM_train_only, URM_train_all, URM_validation, URM_test
def __init__(self): super(NetflixPrizeReader, self).__init__() pre_splitted_path = "Data_manager_split_datasets/NetflixPrize/WWW/MultiVAE_our_interface/" pre_splitted_filename = "splitted_data" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("NetflixPrizeReader: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict(pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("NetflixPrizeReader: Pre-splitted data not found, building new one") data_reader = NetflixPrizeReader_DataManager() data_reader.load_data() URM_all = data_reader.get_URM_all() # binarize the data (only keep ratings >= 4) URM_all.data = URM_all.data >= 4.0 URM_all.eliminate_zeros() URM_all = sps.coo_matrix(URM_all) dict_for_dataframe = {"userId": URM_all.row, "movieId": URM_all.col, "rating": URM_all.data } URM_all_dataframe = pd.DataFrame(data = dict_for_dataframe) self.URM_train, self.URM_train_all, self.URM_validation, self.URM_test = split_train_validation_test_VAE_CF(URM_all_dataframe, n_heldout_users = 40000) n_rows = max(self.URM_train.shape[0], self.URM_train_all.shape[0], self.URM_validation.shape[0], self.URM_test.shape[0]) n_cols = max(self.URM_train.shape[1], self.URM_train_all.shape[1], self.URM_validation.shape[1], self.URM_test.shape[1]) newShape = (n_rows, n_cols) self.URM_test = reshapeSparse(self.URM_test, newShape) self.URM_train = reshapeSparse(self.URM_train, newShape) self.URM_train_all = reshapeSparse(self.URM_train_all, newShape) self.URM_test = reshapeSparse(self.URM_test, newShape) data_dict = { "URM_train": self.URM_train, "URM_train_all": self.URM_train_all, "URM_test": self.URM_test, "URM_validation": self.URM_validation, } save_data_dict(data_dict, pre_splitted_path, pre_splitted_filename) print("NetflixPrizeReader: Dataset loaded")
def __init__(self, pre_splitted_path): super(Movielens1MReader, self).__init__() pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_Movielens1M: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "Dataset_Movielens1M: Pre-splitted data not found, building new one" ) # Ensure file is loaded as matrix Dataset_github.load_rating_file_as_list = Dataset_github.load_rating_file_as_matrix dataset = Dataset_github("Conferences/WWW/NeuMF_github/Data/ml-1m") URM_train_original, URM_test = dataset.trainMatrix, dataset.testRatings URM_train_original = URM_train_original.tocsr() URM_test = URM_test.tocsr() from Base.Recommender_utils import reshapeSparse shape = (max(URM_train_original.shape[0], URM_test.shape[0]), max(URM_train_original.shape[1], URM_test.shape[1])) URM_train_original = reshapeSparse(URM_train_original, shape) URM_test = reshapeSparse(URM_test, shape) URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=shape[0], n_cols=shape[1]) for user_index in range(len(dataset.testNegatives)): user_test_items = dataset.testNegatives[user_index] URM_test_negatives_builder.add_single_row(user_index, user_test_items, data=1.0) URM_test_negative = URM_test_negatives_builder.get_SparseMatrix() URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_train_original.copy()) self.URM_DICT = { "URM_train_original": URM_train_original, "URM_train": URM_train, "URM_test": URM_test, "URM_test_negative": URM_test_negative, "URM_validation": URM_validation, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("Dataset_Movielens1M: Dataset loaded")
def __init__(self, split_type="cold_user"): super(Movielens20MReader, self).__init__() assert split_type in ["cold_user", "warm_user"] pre_splitted_path = "Data_manager_split_datasets/Movielens20M/WWW/MultiVAE_our_interface/" pre_splitted_filename = "splitted_data" + "_" + split_type # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Movielens20MReader: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict(pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("Movielens20MReader: Pre-splitted data not found, building new one") data_reader = Movielens20MReader_DataManager() data_reader.load_data() URM_all = data_reader.get_URM_all() # binarize the data (only keep ratings >= 4) URM_all.data = URM_all.data >= 4.0 URM_all.eliminate_zeros() if split_type == "cold_user": URM_all = sps.coo_matrix(URM_all) dict_for_dataframe = {"userId": URM_all.row, "movieId": URM_all.col, "rating": URM_all.data } URM_all_dataframe = pd.DataFrame(data=dict_for_dataframe) self.URM_train, self.URM_train_all, self.URM_validation, self.URM_test = split_train_validation_test_VAE_CF( URM_all_dataframe, n_heldout_users=10000) n_rows = max(self.URM_train.shape[0], self.URM_train_all.shape[0], self.URM_validation.shape[0], self.URM_test.shape[0]) n_cols = max(self.URM_train.shape[1], self.URM_train_all.shape[1], self.URM_validation.shape[1], self.URM_test.shape[1]) newShape = (n_rows, n_cols) self.URM_test = reshapeSparse(self.URM_test, newShape) self.URM_train = reshapeSparse(self.URM_train, newShape) self.URM_train_all = reshapeSparse(self.URM_train_all, newShape) self.URM_test = reshapeSparse(self.URM_test, newShape) data_dict = { "URM_train": self.URM_train, "URM_train_all": self.URM_train_all, "URM_test": self.URM_test, "URM_validation": self.URM_validation, } elif split_type == "warm_user": URM_all = sps.csr_matrix(URM_all) users_to_keep = np.ediff1d(URM_all.indptr) >= 4 URM_all = URM_all[users_to_keep, :] URM_all = sps.csc_matrix(URM_all) items_to_keep = np.ediff1d(URM_all.indptr) >= 1 URM_all = URM_all[:, items_to_keep] URM_all = sps.csr_matrix(URM_all) self.URM_train, self.URM_validation, self.URM_test, _ = split_train_validation_test_negative_leave_one_out_user_wise( URM_all) data_dict = { "URM_train": self.URM_train, "URM_test": self.URM_test, "URM_validation": self.URM_validation } save_data_dict(data_dict, pre_splitted_path, pre_splitted_filename) print("Movielens20MReader: Dataset loaded")
output_directory + "split/") from Base.Recommender_utils import reshapeSparse URM_train_all = sps.vstack([train_data, vad_data_tr, test_data_tr]) URM_train_all_shape = URM_train_all.shape ## OFFSET all row indices n_train_users = train_data.shape[0] URM_validation = offset_sparse_matrix_row(vad_data_te, n_train_users) n_train_and_validation_users = URM_validation.shape[0] URM_validation = reshapeSparse(URM_validation, URM_train_all_shape) URM_test = offset_sparse_matrix_row(test_data_te, n_train_and_validation_users) ############################################################################################################################################################## ##### Set up training hyperparameters N = train_data.shape[0] idxlist = list(range(N)) # training batch size batch_size = 500 batches_per_epoch = int(np.ceil(float(N) / batch_size)) N_vad = vad_data_tr.shape[0] idxlist_vad = list(range(N_vad))
def __init__(self, pre_splitted_path, dataset_variant="a", train_interactions=1): super(CiteulikeReader, self).__init__() assert dataset_variant in [ "a", "t" ], "CiteulikeReader: dataset_variant must be either 'a' or 't'" assert train_interactions in [ 1, 10, "all" ], "CiteulikeReader: train_interactions must be: 1, 10 or 'all'" pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" original_data_path = "Conferences/KDD/CollaborativeVAE_github/data/citeulike-{}/".format( dataset_variant) # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("CiteulikeReader: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "CiteulikeReader: Pre-splitted data not found, building new one" ) print("CiteulikeReader: loading URM") if train_interactions == "all": train_interactions_file_suffix = 10 else: train_interactions_file_suffix = train_interactions URM_train_builder = self._load_data_file( original_data_path + "cf-train-{}-users.dat".format(train_interactions_file_suffix)) URM_test_builder = self._load_data_file( original_data_path + "cf-test-{}-users.dat".format(train_interactions_file_suffix)) URM_test = URM_test_builder.get_SparseMatrix() URM_train = URM_train_builder.get_SparseMatrix() if dataset_variant == "a": ICM_tokens_TFIDF = scipy.io.loadmat(original_data_path + "mult_nor.mat")['X'] else: # Variant "t" uses a different file format and is transposed ICM_tokens_TFIDF = h5py.File(original_data_path + "mult_nor.mat").get('X') ICM_tokens_TFIDF = sps.csr_matrix(ICM_tokens_TFIDF).T ICM_tokens_TFIDF = sps.csr_matrix(ICM_tokens_TFIDF) ICM_tokens_bool = ICM_tokens_TFIDF.copy() ICM_tokens_bool.data = np.ones_like(ICM_tokens_bool.data) n_rows = max(URM_test.shape[0], URM_train.shape[0]) n_cols = max(URM_test.shape[1], URM_train.shape[1], ICM_tokens_TFIDF.shape[0]) newShape = (n_rows, n_cols) URM_test = reshapeSparse(URM_test, newShape) URM_train = reshapeSparse(URM_train, newShape) if train_interactions == "all": URM_train += URM_test URM_train, URM_test = split_train_validation_percentage_random_holdout( URM_train, train_percentage=0.8) URM_train, URM_validation = split_train_validation_percentage_random_holdout( URM_train.copy(), train_percentage=0.8) elif train_interactions == 10: # If train interactions == 10 the train will NOT contain the validation data URM_train, URM_validation = split_train_validation_percentage_random_holdout( URM_train.copy(), train_percentage=0.8) else: # If train interactions == 10 the train WILL contain the validation data _, URM_validation = split_train_validation_percentage_random_holdout( URM_train.copy(), train_percentage=0.8) self.ICM_DICT = { "ICM_tokens_TFIDF": ICM_tokens_TFIDF, "ICM_tokens_bool": ICM_tokens_bool, } self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("CiteulikeReader: loading complete")
def __init__(self, dataset_variant="a", train_interactions=1): super(CiteulikeReader, self).__init__() assert dataset_variant in [ "a", "t" ], "CiteulikeReader: dataset_variant must be either 'a' or 't'" assert train_interactions in [ 1, 10, "all" ], "CiteulikeReader: train_interactions must be: 1, 10 or 'all'" pre_splitted_path = "Data_manager_split_datasets/CiteULike/KDD/CollaborativeVAE_our_interface/" pre_splitted_filename = "splitted_data_citeulike-{}-{}-items".format( dataset_variant, train_interactions) original_data_path = "Conferences/KDD/CollaborativeVAE_github/data/citeulike-{}/".format( dataset_variant) # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("CiteulikeReader: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "CiteulikeReader: Pre-splitted data not found, building new one" ) print("CiteulikeReader: loading URM") if train_interactions == "all": train_interactions_file_suffix = 10 else: train_interactions_file_suffix = train_interactions URM_train_builder = self._load_data_file( original_data_path + "cf-train-{}-users.dat".format(train_interactions_file_suffix)) URM_test_builder = self._load_data_file( original_data_path + "cf-test-{}-users.dat".format(train_interactions_file_suffix)) self.URM_test = URM_test_builder.get_SparseMatrix() self.URM_train = URM_train_builder.get_SparseMatrix() if dataset_variant == "a": self.ICM_title_abstract = scipy.io.loadmat(original_data_path + "mult_nor.mat")['X'] else: # Variant "t" uses a different file format and is transposed self.ICM_title_abstract = h5py.File(original_data_path + "mult_nor.mat").get('X') self.ICM_title_abstract = sps.csr_matrix( self.ICM_title_abstract).T self.ICM_title_abstract = sps.csr_matrix(self.ICM_title_abstract) n_rows = max(self.URM_test.shape[0], self.URM_train.shape[0]) n_cols = max(self.URM_test.shape[1], self.URM_train.shape[1], self.ICM_title_abstract.shape[0]) newShape = (n_rows, n_cols) self.URM_test = reshapeSparse(self.URM_test, newShape) self.URM_train = reshapeSparse(self.URM_train, newShape) if train_interactions == "all": self.URM_train += self.URM_test self.URM_train, self.URM_test = split_train_validation_percentage_random_holdout( self.URM_train, train_percentage=0.8) self.URM_train, self.URM_validation = split_train_validation_percentage_random_holdout( self.URM_train, train_percentage=0.8) else: self.URM_train, self.URM_validation = split_train_validation_percentage_random_holdout( self.URM_train, train_percentage=0.8) data_dict = { "URM_train": self.URM_train, "URM_test": self.URM_test, "URM_validation": self.URM_validation, "ICM_title_abstract": self.ICM_title_abstract } save_data_dict(data_dict, pre_splitted_path, pre_splitted_filename) print("CiteulikeReader: loading complete")
def __init__(self, pre_splitted_path, type="original"): pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_{}: Attempting to load pre-splitted data".format( self.DATASET_NAME)) for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("Dataset_{}: Pre-splitted data not found, building new one". format(self.DATASET_NAME)) if type == "original": # Ensure file is loaded as matrix Dataset_github.load_rating_file_as_list = Dataset_github.load_rating_file_as_matrix dataset = Dataset_github( "Conferences/IJCAI/DELF_original/Data/ml-1m") URM_train, URM_validation, URM_test, testNegatives = dataset.trainMatrix, dataset.validRatings, \ dataset.testRatings, dataset.testNegatives URM_train = URM_train.tocsr() URM_validation = URM_validation.tocsr() URM_test = URM_test.tocsr() URM_timestamp = "no" from Base.Recommender_utils import reshapeSparse shape = (max(URM_train.shape[0], URM_validation.shape[0], URM_test.shape[0]), max(URM_train.shape[1], URM_validation.shape[1], URM_test.shape[1])) URM_train = reshapeSparse(URM_train, shape) URM_validation = reshapeSparse(URM_validation, shape) URM_test = reshapeSparse(URM_test, shape) URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=shape[0], n_cols=shape[1]) for user_index in range(len(dataset.testNegatives)): user_test_items = dataset.testNegatives[user_index] URM_test_negatives_builder.add_single_row(user_index, user_test_items, data=1.0) URM_test_negative = URM_test_negatives_builder.get_SparseMatrix( ) elif type == "ours": # create from full dataset with leave out one time wise from ORIGINAL full dateset data_reader = Movielens1MReader_DataManager() loaded_dataset = data_reader.load_data() URM_all = loaded_dataset.get_URM_from_name("URM_all") URM_timestamp = loaded_dataset.get_URM_from_name( "URM_timestamp") # make rating implicit URM_all.data = np.ones_like(URM_all.data) URM_train, URM_validation, URM_test, URM_test_negative = split_data_on_timestamp( URM_all, URM_timestamp, negative_items_per_positive=99) else: assert False self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, "URM_test_negative": URM_test_negative, "URM_timestamp": URM_timestamp, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("{}: Dataset loaded".format(self.DATASET_NAME)) print_stat_datareader(self)
def _load_from_original_file(self): # Load data from original self._print("Loading original data") zipFile_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER try: dataFile = zipfile.ZipFile( zipFile_path + "neural_factorization_machine-master.zip") except (FileNotFoundError, zipfile.BadZipFile): self._print("Unable to fild data zip file. Downloading...") download_from_URL(self.DATASET_URL, zipFile_path, "neural_factorization_machine-master.zip") dataFile = zipfile.ZipFile( zipFile_path + "neural_factorization_machine-master.zip") inner_path_in_zip = "neural_factorization_machine-master/data/frappe/" URM_train_path = dataFile.extract(inner_path_in_zip + "frappe.train.libfm", path=zipFile_path + "decompressed/") URM_test_path = dataFile.extract(inner_path_in_zip + "frappe.test.libfm", path=zipFile_path + "decompressed/") URM_validation_path = dataFile.extract( inner_path_in_zip + "frappe.validation.libfm", path=zipFile_path + "decompressed/") tmp_URM_train, item_original_ID_to_index, user_original_ID_to_index = self._loadURM( URM_train_path, item_original_ID_to_index=None, user_original_ID_to_index=None) tmp_URM_test, item_original_ID_to_index, user_original_ID_to_index = self._loadURM( URM_test_path, item_original_ID_to_index=item_original_ID_to_index, user_original_ID_to_index=user_original_ID_to_index) tmp_URM_validation, item_original_ID_to_index, user_original_ID_to_index = self._loadURM( URM_validation_path, item_original_ID_to_index=item_original_ID_to_index, user_original_ID_to_index=user_original_ID_to_index) shape = (len(user_original_ID_to_index), len(item_original_ID_to_index)) tmp_URM_train = reshapeSparse(tmp_URM_train, shape) tmp_URM_test = reshapeSparse(tmp_URM_test, shape) tmp_URM_validation = reshapeSparse(tmp_URM_validation, shape) URM_occurrence = tmp_URM_train + tmp_URM_test + tmp_URM_validation URM_all = URM_occurrence.copy() URM_all.data = np.ones_like(URM_all.data) loaded_URM_dict = { "URM_all": URM_all, "URM_occurrence": URM_occurrence } loaded_dataset = Dataset( dataset_name=self._get_dataset_name(), URM_dictionary=loaded_URM_dict, ICM_dictionary=None, ICM_feature_mapper_dictionary=None, UCM_dictionary=None, UCM_feature_mapper_dictionary=None, user_original_ID_to_index=user_original_ID_to_index, item_original_ID_to_index=item_original_ID_to_index, is_implicit=self.IS_IMPLICIT, ) self._print("cleaning temporary files") shutil.rmtree(zipFile_path + "decompressed", ignore_errors=True) self._print("loading complete") return loaded_dataset
def __init__(self): super(PinterestICCVReader, self).__init__() pre_splitted_path = "Data_manager_split_datasets/PinterestICCV/WWW/NeuMF_our_interface/" pre_splitted_filename = "splitted_data" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_Pinterest: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "Dataset_Pinterest: Pre-splitted data not found, building new one" ) # Ensure file is loaded as matrix Dataset_github.load_rating_file_as_list = Dataset_github.load_rating_file_as_matrix dataset = Dataset_github( "Conferences/WWW/NeuMF_github/Data/pinterest-20") self.URM_train_original, self.URM_test = dataset.trainMatrix, dataset.testRatings self.URM_train_original = self.URM_train_original.tocsr() self.URM_test = self.URM_test.tocsr() from Base.Recommender_utils import reshapeSparse shape = (max(self.URM_train_original.shape[0], self.URM_test.shape[0]), max(self.URM_train_original.shape[1], self.URM_test.shape[1])) self.URM_train_original = reshapeSparse(self.URM_train_original, shape) self.URM_test = reshapeSparse(self.URM_test, shape) URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=shape[0], n_cols=shape[1]) for user_index in range(len(dataset.testNegatives)): user_test_items = dataset.testNegatives[user_index] URM_test_negatives_builder.add_single_row(user_index, user_test_items, data=1.0) self.URM_test_negative = URM_test_negatives_builder.get_SparseMatrix( ) self.URM_train, self.URM_validation = split_train_validation_leave_one_out_user_wise( self.URM_train_original.copy()) data_dict = { "URM_train_original": self.URM_train_original, "URM_train": self.URM_train, "URM_test": self.URM_test, "URM_test_negative": self.URM_test_negative, "URM_validation": self.URM_validation, } save_data_dict(data_dict, pre_splitted_path, pre_splitted_filename) print("Dataset_Pinterest: Dataset loaded") print("N_items {}, n_users {}".format(self.URM_train.shape[1], self.URM_train.shape[0]))