Example #1
0
    def load_random_split(self,
                          test_rate=0.1,
                          random=False,
                          n_negative=100,
                          by_user=False,
                          n_test=10):
        """load split date generated by random_split.

        Load split data generated by random_split from Onedrive, with test_rate = 0.1 and by_user = False.

        Args:
            test_rate: percentage of the test data. Note that percentage of the validation data will be the same as
                        test data.
            random: bool. Whether randomly leave one basket as testing.
            n_negative:  Number of negative samples for testing and validation data.
            by_user: bool. Default False.
                    - Ture: user-based split,
                    - False: global split,
            n_test: int. Default 10. The number of testing and validation copies.
                    If n_test==0, will load the original (no negative items) valid and test datasets.

        Returns:
            train_data (DataFrame): Interaction for training.
            valid_data list(DataFrame): List of interactions for validation
            test_data list(DataFrame): List of interactions for testing
        """

        processed_random_split_path = os.path.join(self.processed_path,
                                                   "random")
        if not os.path.exists(processed_random_split_path):
            os.mkdir(processed_random_split_path)

        parameterized_path = generate_parameterized_path(test_rate=test_rate,
                                                         random=random,
                                                         n_negative=n_negative,
                                                         by_user=by_user)
        download_path = processed_random_split_path
        processed_random_split_path = os.path.join(processed_random_split_path,
                                                   parameterized_path)
        if not os.path.exists(processed_random_split_path):
            if (test_rate == 0.1 and random is False and n_negative == 100
                    and by_user is False):
                # default parameters, can be downloaded from Onedrive
                folder = OneDrive(url=self.processed_random_split_url,
                                  path=download_path)
                folder.download()
                un_zip(processed_random_split_path + ".zip", download_path)
            else:
                # make
                self.make_random_split(
                    test_rate=test_rate,
                    random=random,
                    n_negative=n_negative,
                    by_user=by_user,
                    n_test=n_test,
                )

        # load data from local storage
        return load_split_data(processed_random_split_path, n_test=n_test)
Example #2
0
    def load_leave_one_out(self,
                           random=False,
                           n_negative=100,
                           n_test=10,
                           validate=1):
        """load split data generated by leave_out_out without random select.

        Load split data generated by leave_out_out without random select from Onedrive.

        Args:
            random: bool. Whether randomly leave one item as testing.
            n_negative:  Number of negative samples for testing and validation data.
            n_test: int. Default 10. The number of testing and validation copies.
                    If n_test==0, will load the original (no negative items) valid and test datasets.

        Returns:
            train_data (DataFrame): Interaction for training.
            valid_data list(DataFrame): List of interactions for validation
            test_data list(DataFrame): List of interactions for testing
        """

        processed_leave_one_out_path = os.path.join(self.processed_path,
                                                    "leave_one_out")
        if not os.path.exists(processed_leave_one_out_path):
            os.mkdir(processed_leave_one_out_path)

        parameterized_path = generate_parameterized_path(
            test_rate=0,
            random=random,
            n_negative=n_negative,
            by_user=False,
            validate=validate,
        )

        download_path = processed_leave_one_out_path
        processed_leave_one_out_path = os.path.join(
            processed_leave_one_out_path, parameterized_path)

        if not os.path.exists(processed_leave_one_out_path):
            self.make_leave_one_out(random=random,
                                    n_negative=n_negative,
                                    n_test=n_test,
                                    validate=validate)

        # load data from local storage
        return load_split_data(processed_leave_one_out_path,
                               n_test=n_test,
                               validate=validate)
Example #3
0
    def load_leave_one_basket(self, random=False, n_negative=100, n_test=10):
        """load split date generated by leave_one_basket without random select.

        Load split data generated by leave_one_basket without random select from Onedrive.

        Args:
            random: bool. Whether randomly leave one basket as testing.
            n_negative:  Number of negative samples for testing and validation data.
            n_test: int. Default 10. The number of testing and validation copies.
                    If n_test==0, will load the original (no negative items) valid and test datasets.

        Returns:
            train_data (DataFrame): Interaction for training.
            valid_data list(DataFrame): List of interactions for validation
            test_data list(DataFrame): List of interactions for testing
        """

        processed_leave_one_basket_path = os.path.join(self.processed_path,
                                                       "leave_one_basket")
        if not os.path.exists(processed_leave_one_basket_path):
            os.mkdir(processed_leave_one_basket_path)

        parameterized_path = generate_parameterized_path(test_rate=0,
                                                         random=random,
                                                         n_negative=n_negative,
                                                         by_user=False)
        download_path = processed_leave_one_basket_path
        processed_leave_one_basket_path = os.path.join(
            processed_leave_one_basket_path, parameterized_path)
        if not os.path.exists(processed_leave_one_basket_path):
            if random is False and n_negative == 100:
                # default parameters, can be downloaded from Onedrive
                folder = OneDrive(url=self.processed_leave_one_basket_url,
                                  path=download_path)
                folder.download()
                un_zip(processed_leave_one_basket_path + ".zip", download_path)
            else:
                # make
                self.make_leave_one_basket(random=random,
                                           n_negative=n_negative,
                                           n_test=n_test)

        # load data from local storage
        return load_split_data(processed_leave_one_basket_path, n_test=n_test)