Example #1
0
def test_from_3d_numpy_to_nested():
    """Test the from_3d_numpy_to_nested() function.
    """

    array = np.random.normal(size=(5, 12, 2))
    nested = from_3d_numpy_to_nested(array)
    assert is_nested_dataframe(nested)
Example #2
0
def _assert_almost_equal(x, y, decimal=6, err_msg="", verbose=True):
    # we iterate over columns and rows make cell-wise comparisons,
    # tabularizing the data first would simplify this a bit, but does not
    # work for unequal length data

    if is_nested_dataframe(x):
        # make sure both inputs have the same shape
        if not x.shape == y.shape:
            raise ValueError("Found inputs with different shapes")

        # iterate over columns
        n_columns = x.shape[1]
        for i in range(n_columns):
            xc = x.iloc[:, i].tolist()
            yc = y.iloc[:, i].tolist()

            # iterate over rows, checking if cells are equal
            for xci, yci in zip(xc, yc):
                np.testing.assert_array_almost_equal(
                    xci, yci, decimal=decimal, err_msg=err_msg,
                    verbose=verbose)

    else:
        np.testing.assert_array_almost_equal(
            x, y, decimal=decimal, err_msg=err_msg, verbose=verbose)
Example #3
0
def _compare_nested_frame(func, x, y, **kwargs):
    """Helper function to compare two nested pd.DataFrames"""
    # we iterate over columns and rows to make cell-wise comparisons,
    # tabularizing the data first would simplify this, but does not
    # work for unequal length data

    # in some cases, x and y may be empty (e.g. TSFreshRelevantFeatureExtractor) and
    # we cannot compare individual cells, so we simply check if they are equal
    assert isinstance(x, pd.DataFrame)
    if x.empty:
        assert_frame_equal(x, y)

    elif is_nested_dataframe(x):
        # make sure both inputs have the same shape
        if not x.shape == y.shape:
            raise ValueError("Found inputs with different shapes")

        # iterate over columns, checking individuals cells
        n_columns = x.shape[1]
        for i in range(n_columns):
            xc = x.iloc[:, i].tolist()
            yc = y.iloc[:, i].tolist()

            # iterate over rows, checking if cells are equal
            for xci, yci in zip(xc, yc):
                func(xci, yci, **kwargs)
def check_panel_to_panel_transform_univariate(Estimator):
    n_instances = 5
    out = _construct_fit_transform(Estimator, n_instances=n_instances)
    assert isinstance(out, (pd.DataFrame, np.ndarray))
    assert out.shape[0] == n_instances
    if isinstance(out, np.ndarray):
        assert out.ndim == 3
    if isinstance(out, pd.DataFrame):
        assert is_nested_dataframe(out)
Example #5
0
def test_from_3d_numpy_to_nested(n_instances, n_columns, n_timepoints):
    array = np.random.normal(size=(n_instances, n_columns, n_timepoints))
    nested = from_3d_numpy_to_nested(array)

    # check types and shapes
    assert is_nested_dataframe(nested)
    assert nested.shape == (n_instances, n_columns)
    assert nested.iloc[0, 0].shape[0] == n_timepoints

    # check values of random series
    np.testing.assert_array_equal(nested.iloc[1, 0], array[1, 0, :])
def check_panel_to_panel_transform_multivariate(Estimator):
    n_instances = 5
    if _has_tag(Estimator, "univariate-only"):
        _check_raises_error(Estimator, n_instances=n_instances, n_columns=3)
    else:
        out = _construct_fit_transform(Estimator, n_instances=n_instances, n_columns=3)
        assert isinstance(out, (pd.DataFrame, np.ndarray))
        assert out.shape[0] == n_instances
        if isinstance(out, np.ndarray):
            assert out.ndim == 3
        if isinstance(out, pd.DataFrame):
            assert is_nested_dataframe(out)
def main():
    #Load arff file into Tuple of size 2.
    #First element has the time-series data in arrays and Second element has the description of the attributes
    TRAIN = arff.loadarff('ItalyPowerDemand_TRAIN.arff')
    TEST = arff.loadarff('ItalyPowerDemand_TEST.arff')
    #Convert the data from the first Tuple elemento to a tabularized dataframe
    df_TRAIN = pd.DataFrame(TRAIN[0])
    df_TEST = pd.DataFrame(TEST[0])

    #Using sktime to handle the data
    print(df_TRAIN.head())
    print('\n Is nested the df above?', is_nested_dataframe(df_TRAIN), '\n')

    #Handling the datasets
    X_train = df_TRAIN.drop('target', axis=1)
    y_train = df_TRAIN['target'].astype(int)
    print(X_train.head(), y_train.head(), '\n')
    X_test = df_TEST.drop('target', axis=1)
    y_test = df_TEST['target'].astype(int)

    #Detabularizing and Nesting X_train, X_test
    X_train_detab = detabularize(X_train)
    X_test_detab = detabularize(X_test)
    print(X_train_detab.head())
    print('Is nested the detabularized df above?',
          is_nested_dataframe(X_train_detab), '\n')

    #The lines above could be simplified with the following method from sktime
    X, y = load_from_arff_to_dataframe('ItalyPowerDemand_TRAIN.arff')
    print(X_train_detab.head(), X.head(), type(y_train), type(y))

    #Classifier algorithm
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw")
    knn.fit(X_train_detab, y_train)
    print('The score of the KNN classifier is:',
          round(knn.score(X_test_detab, y_test), 4))
Example #8
0
def _compare_nested_frame(func, x, y, **kwargs):
    """Helper function to compare two nested pd.DataFrames

    Parameters
    ----------
    func : function
        Function from np.testing for comparing arrays.
    x : pd.DataFrame
    y : pd.DataFrame
    kwargs : dict
        Keyword argument for function

    Raises
    ------
    AssertionError
        If x and y are not equal
    """
    # We iterate over columns and rows to make cell-wise comparisons.
    # Tabularizing the data first would simplify this, but does not
    # work for unequal length data.

    # In rare cases, x and y may be empty (e.g. TSFreshRelevantFeatureExtractor) and
    # we cannot compare individual cells, so we simply check if everything else is
    # equal here.
    assert isinstance(x, pd.DataFrame)
    if x.empty:
        assert_frame_equal(x, y)

    elif is_nested_dataframe(x):
        # Check if both inputs have the same shape
        if not x.shape == y.shape:
            raise ValueError("Found inputs with different shapes")

        # Iterate over columns
        n_columns = x.shape[1]
        for i in range(n_columns):
            xc = x.iloc[:, i].tolist()
            yc = y.iloc[:, i].tolist()

            # Iterate over rows, checking if individual cells are equal
            for xci, yci in zip(xc, yc):
                func(xci, yci, **kwargs)
Example #9
0
def check_X(
    X,
    enforce_univariate=False,
    enforce_min_instances=1,
    enforce_min_columns=1,
    coerce_to_numpy=False,
    coerce_to_pandas=False,
):
    """Validate input data.
    Parameters
    ----------
    X : pd.DataFrame or np.array
        Input data
    enforce_univariate : bool, optional (default=False)
        Enforce that X is univariate.
    enforce_min_instances : int, optional (default=1)
        Enforce minimum number of instances.
    enforce_min_columns : int, optional (default=1)
        Enforce minimum number of columns (or time-series variables).
    coerce_to_numpy : bool, optional (default=False)
        If True, X will be coerced to a 3-dimensional numpy array.
    coerce_to_pandas : bool, optional (default=False)
        If True, X will be coerced to a nested pandas DataFrame.
    Returns
    -------
    X : pd.DataFrame or np.array
        Checked and possibly converted input data
    Raises
    ------
    ValueError
        If X is invalid input data
    """
    # check input type
    if coerce_to_pandas and coerce_to_numpy:
        raise ValueError(
            "`coerce_to_pandas` and `coerce_to_numpy` cannot " "both be set to True"
        )

    if not isinstance(X, VALID_X_TYPES):
        raise ValueError(
            f"X must be a pd.DataFrame or a np.array, " f"but found: {type(X)}"
        )

    # check np.array
    # check first if we have the right number of dimensions, otherwise we
    # may not be able to get the shape of the second dimension below
    if isinstance(X, np.ndarray):
        if not X.ndim == 3:
            raise ValueError(
                f"If passed as a np.array, X must be a 3-dimensional "
                f"array, but found shape: {X.shape}"
            )
        if coerce_to_pandas:
            X = from_3d_numpy_to_nested(X)

    # enforce minimum number of columns
    n_columns = X.shape[1]
    if n_columns < enforce_min_columns:
        raise ValueError(
            f"X must contain at least: {enforce_min_columns} columns, "
            f"but found only: {n_columns}."
        )

    # enforce univariate data
    if enforce_univariate and n_columns > 1:
        raise ValueError(
            f"X must be univariate with X.shape[1] == 1, but found: "
            f"X.shape[1] == {n_columns}."
        )

    # enforce minimum number of instances
    if enforce_min_instances > 0:
        _enforce_min_instances(X, min_instances=enforce_min_instances)

    # check pd.DataFrame
    if isinstance(X, pd.DataFrame):
        if not is_nested_dataframe(X):
            raise ValueError(
                "If passed as a pd.DataFrame, X must be a nested "
                "pd.DataFrame, with pd.Series or np.arrays inside cells."
            )
        # convert pd.DataFrame
        if coerce_to_numpy:
            X = from_nested_to_3d_numpy(X)

    return X