def __init__(self,
              train: Tuple[np.ndarray, np.ndarray],
              val: Optional[Tuple[np.ndarray, np.ndarray]] = None):
     _check_time_series_inputs(train=train,
                               val=val,
                               task_type="time_series_regression")
     super().__init__(train_tensors=train, val_tensors=val, shuffle=True)
     self.cross_validators = get_cross_validators(
         CrossValTypes.k_fold_cross_validation,
         CrossValTypes.shuffle_split_cross_validation)
     self.holdout_validators = get_holdout_validators(
         HoldoutValTypes.holdout_validation)
 def __init__(self,
              train: TIME_SERIES_CLASSIFICATION_INPUT,
              val: Optional[TIME_SERIES_CLASSIFICATION_INPUT] = None):
     _check_time_series_inputs(train=train,
                               val=val,
                               task_type="time_series_classification")
     super().__init__(train_tensors=train, val_tensors=val, shuffle=True)
     self.cross_validators = get_cross_validators(
         CrossValTypes.stratified_k_fold_cross_validation,
         CrossValTypes.k_fold_cross_validation,
         CrossValTypes.shuffle_split_cross_validation,
         CrossValTypes.stratified_shuffle_split_cross_validation)
     self.holdout_validators = get_holdout_validators(
         HoldoutValTypes.holdout_validation,
         HoldoutValTypes.stratified_holdout_validation)
    def __init__(self, X: Any, Y: Any,
                 X_test: Optional[Union[np.ndarray, pd.DataFrame]] = None,
                 Y_test: Optional[Union[np.ndarray, pd.DataFrame]] = None):
        X, self.data_types, self.nan_mask, self.itovs, self.vtois = self.interpret_columns(X)

        if Y is not None:
            Y, _, self.target_nan_mask, self.target_itov, self.target_vtoi = self.interpret_columns(
                Y, assert_single_column=True)
            # For tabular classification, we expect also that it complies with Sklearn
            # The below check_array performs input data checks and make sure that a numpy array
            # is returned, as both Pytorch/Sklearn deal directly with numpy/list objects.
            # In this particular case, the interpret() returns a pandas object (needed to extract)
            # the data types, yet check_array translate the np.array. When Sklearn support pandas
            # the below function will simply return Pandas DataFrame.
            Y = check_array(Y, ensure_2d=False)

        self.categorical_columns, self.numerical_columns, self.categories, self.num_features, self.num_classes = \
            self.infer_dataset_properties(X, Y)

        # Allow support for X_test, Y_test. They will NOT be used for optimization, but
        # rather to have a performance through time on the test data
        if X_test is not None:
            X_test, self._test_data_types, _, _, _ = self.interpret_columns(X_test)

            # Some quality checks on the data
            if self.data_types != self._test_data_types:
                raise ValueError(f"The train data inferred types {self.data_types} are "
                                 "different than the test inferred types {self._test_data_types}")
            if Y_test is not None:
                Y_test, _, _, _, _ = self.interpret_columns(
                    Y_test, assert_single_column=True)
                Y_test = check_array(Y_test, ensure_2d=False)

        super().__init__(train_tensors=(X, Y), test_tensors=(X_test, Y_test), shuffle=True)
        self.task_type = TABULAR_CLASSIFICATION
        self.cross_validators = get_cross_validators(
            CrossValTypes.stratified_k_fold_cross_validation,
            CrossValTypes.k_fold_cross_validation,
            CrossValTypes.shuffle_split_cross_validation,
            CrossValTypes.stratified_shuffle_split_cross_validation
        )
        self.holdout_validators = get_holdout_validators(
            HoldoutValTypes.holdout_validation,
            HoldoutValTypes.stratified_holdout_validation
        )
    def __init__(self,
                 target_variables: Tuple[int],
                 sequence_length: int,
                 n_steps: int,
                 train: TIME_SERIES_FORECASTING_INPUT,
                 val: Optional[TIME_SERIES_FORECASTING_INPUT] = None):
        """

        :param target_variables: The indices of the variables you want to forecast
        :param sequence_length: The amount of past data you want to use to forecast future value
        :param n_steps: The number of steps you want to forecast into the future
        :param train: Tuple with one tensor holding the training data
        :param val: Tuple with one tensor holding the validation data
        """
        _check_time_series_forecasting_inputs(
            target_variables=target_variables,
            sequence_length=sequence_length,
            n_steps=n_steps,
            train=train,
            val=val)
        train = _prepare_time_series_forecasting_tensor(
            tensor=train,
            target_variables=target_variables,
            sequence_length=sequence_length,
            n_steps=n_steps)
        if val is not None:
            val = _prepare_time_series_forecasting_tensor(
                tensor=val,
                target_variables=target_variables,
                sequence_length=sequence_length,
                n_steps=n_steps)
        super().__init__(train_tensors=train, val_tensors=val, shuffle=False)
        self.cross_validators = get_cross_validators(
            CrossValTypes.time_series_cross_validation)
        self.holdout_validators = get_holdout_validators(
            HoldoutValTypes.holdout_validation)
예제 #5
0
    def __init__(self,
                 train: IMAGE_DATASET_INPUT,
                 val: Optional[IMAGE_DATASET_INPUT] = None,
                 test: Optional[IMAGE_DATASET_INPUT] = None):
        _check_image_inputs(train=train, val=val)
        train = _create_image_dataset(data=train)
        if val is not None:
            val = _create_image_dataset(data=val)
        if test is not None:
            test = _create_image_dataset(data=test)
        self.mean, self.std = _calc_mean_std(train=train)

        super().__init__(train_tensors=train,
                         val_tensors=val,
                         test_tensors=test,
                         shuffle=True)
        self.cross_validators = get_cross_validators(
            CrossValTypes.stratified_k_fold_cross_validation,
            CrossValTypes.k_fold_cross_validation,
            CrossValTypes.shuffle_split_cross_validation,
            CrossValTypes.stratified_shuffle_split_cross_validation)
        self.holdout_validators = get_holdout_validators(
            HoldoutValTypes.holdout_validation,
            HoldoutValTypes.stratified_holdout_validation)