def make_temporal_basket_split(self, data=None, test_rate=0.1, n_negative=100, by_user=False, n_test=10): """Generate split data with temporal_basket_split. Generate split data with temporal_basket_split method. Args: data (DataFrame): DataFrame to be split. - Default is None. It will load the raw interaction, with a default filter ``` data = filter_user_item_order(data, min_u_c=10, min_o_c=10, min_i_c=10) ``` - Users can specify their filtered data by using filter methods in data_split.py test_rate: percentage of the test data. Note that percentage of the validation data will be the same as testing. n_negative: Number of negative samples for testing and validation data. by_user: bool. Default False. - True: user-based split, - False: global split, n_test: int. Default 10. The number of testing and validation copies. Returns: train_data (DataFrame): Interaction for training. valid_data list(DataFrame): List of interactions for validation test_data list(DataFrame): List of interactions for testing """ if data is None: data = self.load_interaction() data = filter_user_item_order(data, min_u_c=10, min_o_c=10, min_i_c=10) if not isinstance(data, pd.DataFrame): raise RuntimeError("data is not a type of DataFrame") if DEFAULT_TIMESTAMP_COL not in data.columns: raise RuntimeError("This dataset doesn't have an TIMESTAMP_COL") if DEFAULT_ORDER_COL not in data.columns: raise RuntimeError("This dataset doesn't have an ORDER_COL") result = split_data( data, split_type="temporal_basket", test_rate=test_rate, n_negative=n_negative, save_dir=self.processed_path, by_user=by_user, n_test=n_test, ) return result
def make_leave_one_basket(self, data=None, random=False, n_negative=100, n_test=10): """Generate split data with leave_one_basket. Generate split data with leave_one_basket method. Args: data (DataFrame): DataFrame to be split. - Default is None. It will load the raw interaction, with a default filter ''' filter_user_item(data, min_u_c=0, min_o_c=3, min_i_c=0) ''' - Users can specify their filtered data by using filter methods in data_split.py random: bool. Whether randomly leave one basket as testing. n_negative: Number of negative samples for testing and validation data. n_test: int. Default 10. The number of testing and validation copies. Returns: train_data (DataFrame): Interaction for training. valid_data list(DataFrame): List of interactions for validation test_data list(DataFrame): List of interactions for testing """ if data is None: data = self.load_interaction() data = filter_user_item_order(data, min_u_c=0, min_o_c=3, min_i_c=0) if not isinstance(data, pd.DataFrame): raise RuntimeError("data is not a type of DataFrame") if DEFAULT_TIMESTAMP_COL not in data.columns: raise RuntimeError("This dataset doesn't have an TIMESTAMP_COL") if DEFAULT_ORDER_COL not in data.columns: raise RuntimeError("This dataset doesn't have an ORDER_COL") result = split_data( data, split_type="leave_one_basket", test_rate=0, random=random, n_negative=n_negative, save_dir=self.processed_path, n_test=n_test, ) return result