def load_random_split(self, test_rate=0.1, random=False, n_negative=100, by_user=False, n_test=10): """load split date generated by random_split. Load split data generated by random_split from Onedrive, with test_rate = 0.1 and by_user = False. Args: test_rate: percentage of the test data. Note that percentage of the validation data will be the same as test data. random: bool. Whether randomly leave one basket as testing. n_negative: Number of negative samples for testing and validation data. by_user: bool. Default False. - Ture: user-based split, - False: global split, n_test: int. Default 10. The number of testing and validation copies. If n_test==0, will load the original (no negative items) valid and test datasets. Returns: train_data (DataFrame): Interaction for training. valid_data list(DataFrame): List of interactions for validation test_data list(DataFrame): List of interactions for testing """ processed_random_split_path = os.path.join(self.processed_path, "random") if not os.path.exists(processed_random_split_path): os.mkdir(processed_random_split_path) parameterized_path = generate_parameterized_path(test_rate=test_rate, random=random, n_negative=n_negative, by_user=by_user) download_path = processed_random_split_path processed_random_split_path = os.path.join(processed_random_split_path, parameterized_path) if not os.path.exists(processed_random_split_path): if (test_rate == 0.1 and random is False and n_negative == 100 and by_user is False): # default parameters, can be downloaded from Onedrive folder = OneDrive(url=self.processed_random_split_url, path=download_path) folder.download() un_zip(processed_random_split_path + ".zip", download_path) else: # make self.make_random_split( test_rate=test_rate, random=random, n_negative=n_negative, by_user=by_user, n_test=n_test, ) # load data from local storage return load_split_data(processed_random_split_path, n_test=n_test)
def load_leave_one_out(self, random=False, n_negative=100, n_test=10, validate=1): """load split data generated by leave_out_out without random select. Load split data generated by leave_out_out without random select from Onedrive. Args: random: bool. Whether randomly leave one item as testing. n_negative: Number of negative samples for testing and validation data. n_test: int. Default 10. The number of testing and validation copies. If n_test==0, will load the original (no negative items) valid and test datasets. Returns: train_data (DataFrame): Interaction for training. valid_data list(DataFrame): List of interactions for validation test_data list(DataFrame): List of interactions for testing """ processed_leave_one_out_path = os.path.join(self.processed_path, "leave_one_out") if not os.path.exists(processed_leave_one_out_path): os.mkdir(processed_leave_one_out_path) parameterized_path = generate_parameterized_path( test_rate=0, random=random, n_negative=n_negative, by_user=False, validate=validate, ) download_path = processed_leave_one_out_path processed_leave_one_out_path = os.path.join( processed_leave_one_out_path, parameterized_path) if not os.path.exists(processed_leave_one_out_path): self.make_leave_one_out(random=random, n_negative=n_negative, n_test=n_test, validate=validate) # load data from local storage return load_split_data(processed_leave_one_out_path, n_test=n_test, validate=validate)
def load_leave_one_basket(self, random=False, n_negative=100, n_test=10): """load split date generated by leave_one_basket without random select. Load split data generated by leave_one_basket without random select from Onedrive. Args: random: bool. Whether randomly leave one basket as testing. n_negative: Number of negative samples for testing and validation data. n_test: int. Default 10. The number of testing and validation copies. If n_test==0, will load the original (no negative items) valid and test datasets. Returns: train_data (DataFrame): Interaction for training. valid_data list(DataFrame): List of interactions for validation test_data list(DataFrame): List of interactions for testing """ processed_leave_one_basket_path = os.path.join(self.processed_path, "leave_one_basket") if not os.path.exists(processed_leave_one_basket_path): os.mkdir(processed_leave_one_basket_path) parameterized_path = generate_parameterized_path(test_rate=0, random=random, n_negative=n_negative, by_user=False) download_path = processed_leave_one_basket_path processed_leave_one_basket_path = os.path.join( processed_leave_one_basket_path, parameterized_path) if not os.path.exists(processed_leave_one_basket_path): if random is False and n_negative == 100: # default parameters, can be downloaded from Onedrive folder = OneDrive(url=self.processed_leave_one_basket_url, path=download_path) folder.download() un_zip(processed_leave_one_basket_path + ".zip", download_path) else: # make self.make_leave_one_basket(random=random, n_negative=n_negative, n_test=n_test) # load data from local storage return load_split_data(processed_leave_one_basket_path, n_test=n_test)