def get_log_enrichment(
    input_lib: DataFrame,
    output_lib: DataFrame,
    input_stopcodon: DataFrame,
    output_stopcodon: DataFrame,
    min_counts: int,
    stopcodon: bool,
    infinite: float,
) -> Tuple[npt.NDArray, npt.NDArray]:
    """
    Calculate log10 enrichment scores from input and output counts.
    """
    # Copy data and replace low counts by np.nan
    input_lib_np: npt.NDArray = np.copy(input_lib.astype(float))
    output_lib_np: npt.NDArray = np.copy(output_lib.astype(float))
    input_lib_np[input_lib_np < min_counts] = np.nan

    # Stop codon correction
    if stopcodon:
        output_lib_np = stopcodon_correction(
            input_lib_np, output_lib_np, np.array(input_stopcodon), np.array(output_stopcodon)
        )

    # log10 of library and replace infinite values. This will potentially divide by zero.
    with np.errstate(divide='ignore'):
        counts_log10_ratio: npt.NDArray = replace_inf(
            np.log10(output_lib_np / input_lib_np), infinite
        )

    return counts_log10_ratio, output_lib_np
Esempio n. 2
0
def writeTrainTestData(name: str, train: DataFrame, test: DataFrame):
    train = train.astype({'102':int})
    train_file = open(f'{name}.data', 'w' if os.path.isfile(f'{name}.data') else 'x')
    train.to_csv(train_file, header=False, index=False)
    train_file.close()

    test = test.astype({'102':int})
    test_file = open(f'{name}.test', 'w' if os.path.isfile(f'{name}.test') else 'x')
    test.to_csv(test_file, header=False, index=False)
    test_file.close()
Esempio n. 3
0
def isochrone(network: DataFrame,
              start_vertices: array,
              distance_limits: array,
              only_minimum_cover=True) -> array:
    """
    Calculate the isochrone of a network.

    Parameters
    ----------
    network : Pandas DataFrame
        The network graph to be used.
    start_vertices : array(int64)
        The starting vertex.
    distance_limits : array(double)
        The distance limits.
    only_minimum_cover : bool (optional, default: True)
        If True, only the minimum cover is returned.

    Returns
    -------
    isochrone_gdp : GeoDataFrame
        The isochrone paths.
    """
    network.astype({
        "id": int64,
        "source": int64,
        "target": int64,
        "cost": double,
        "reverse_cost": double,
        "length": double,
    })
    start_vertices = array(start_vertices).astype(int64)
    distance_limits = array(distance_limits).astype(double)
    isochroneclass = isochrone_cpp.Isochrone()
    # TODO: Find a way to bypass the type conversion.
    result = isochroneclass.calculate(
        network["id"],
        network["source"],
        network["target"],
        network["cost"],
        network["reverse_cost"],
        network["length"],
        network["geom"],
        start_vertices,
        distance_limits,
        only_minimum_cover,
    )

    return result
Esempio n. 4
0
    def test_read_write_dta12(self):
        original = DataFrame([(1, 2, 3, 4, 5, 6)],
                             columns=[
                                 'astringwithmorethan32characters_1',
                                 'astringwithmorethan32characters_2', '+', '-',
                                 'short', 'delete'
                             ])
        formatted = DataFrame([(1, 2, 3, 4, 5, 6)],
                              columns=[
                                  'astringwithmorethan32characters_',
                                  '_0astringwithmorethan32character', '_',
                                  '_1_', '_short', '_delete'
                              ])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                tm.assert_equal(len(w),
                                1)  # should get a warning for that format.

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  formatted)
Esempio n. 5
0
    def test_read_write_dta12(self):
        original = DataFrame([(1, 2, 3, 4, 5, 6)],
                             columns=['astringwithmorethan32characters_1',
                                      'astringwithmorethan32characters_2',
                                      '+',
                                      '-',
                                      'short',
                                      'delete'])
        formatted = DataFrame([(1, 2, 3, 4, 5, 6)],
                              columns=['astringwithmorethan32characters_',
                                       '_0astringwithmorethan32character',
                                       '_',
                                       '_1_',
                                       '_short',
                                       '_delete'])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                tm.assert_equal(len(w), 1)  # should get a warning for that format.

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted)
Esempio n. 6
0
    def test_read_write_dta12(self):
        original = DataFrame(
            [(1, 2, 3, 4, 5, 6)],
            columns=[
                "astringwithmorethan32characters_1",
                "astringwithmorethan32characters_2",
                "+",
                "-",
                "short",
                "delete",
            ],
        )
        formatted = DataFrame(
            [(1, 2, 3, 4, 5, 6)],
            columns=[
                "astringwithmorethan32characters_",
                "_0astringwithmorethan32character",
                "_",
                "_1_",
                "_short",
                "_delete",
            ],
        )
        formatted.index.name = "index"
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                tm.assert_equal(len(w), 1)  # should get a warning for that format.

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted)
Esempio n. 7
0
    def test_read_write_dta11(self):
        # skip_if_not_little_endian()

        original = DataFrame([(1, 2, 3, 4)],
                             columns=[
                                 'good',
                                 compat.u('b\u00E4d'), '8number',
                                 'astringwithmorethan32characters______'
                             ])
        formatted = DataFrame([(1, 2, 3, 4)],
                              columns=[
                                  'good', 'b_d', '_8number',
                                  'astringwithmorethan32characters_'
                              ])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None, False)
                np.testing.assert_equal(
                    len(w), 1)  # should get a warning for that format.

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                  formatted)
Esempio n. 8
0
def get_dataset(data):

    dataset = DataFrame(data)
    # dataframe = pd.read_csv(pathname, usecols=[1], engine='python', skipfooter=3)
    # dataset = dataframe.values
    # 将int变为float
    dataset = dataset.astype('float32')

    # 归一化
    dataset = __scaler.fit_transform(dataset)

    train_size = int(len(dataset) * 0.65)
    train_list = dataset[:train_size]
    test_list = dataset[train_size:]

    # 训练数据太少,look_back不能过大
    # 3, 5, 7, 9
    look_back = 7
    train_x, train_y = create_dataset(train_list, look_back)
    test_x, test_y = create_dataset(test_list, look_back)

    train_x = numpy.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
    test_x = numpy.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))

    return dataset, train_x, train_y, test_x, test_y
    def _finalize_output(self, frame: DataFrame) -> DataFrame:
        """
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        """
        num_cols = len(frame.columns)
        multi_index_named = True
        if self.header is None:
            if self.names is None:
                if self.prefix is not None:
                    self.names = [f"{self.prefix}{i}" for i in range(num_cols)]
                elif self.header is None:
                    self.names = range(num_cols)
            if len(self.names) != num_cols:
                # usecols is passed through to pyarrow, we only handle index col here
                # The only way self.names is not the same length as number of cols is
                # if we have int index_col. We should just pad the names(they will get
                # removed anyways) to expected length then.
                self.names = list(
                    range(num_cols - len(self.names))) + self.names
                multi_index_named = False
            frame.columns = self.names
        # we only need the frame not the names
        # error: Incompatible types in assignment (expression has type
        # "Union[List[Union[Union[str, int, float, bool], Union[Period, Timestamp,
        # Timedelta, Any]]], Index]", variable has type "Index")  [assignment]
        frame.columns, frame = self._do_date_conversions(  # type: ignore[assignment]
            frame.columns, frame)
        if self.index_col is not None:
            for i, item in enumerate(self.index_col):
                if is_integer(item):
                    self.index_col[i] = frame.columns[item]
                else:
                    # String case
                    if item not in frame.columns:
                        raise ValueError(f"Index {item} invalid")
            frame.set_index(self.index_col, drop=True, inplace=True)
            # Clear names if headerless and no name given
            if self.header is None and not multi_index_named:
                frame.index.names = [None] * len(frame.index.names)

        if self.kwds.get("dtype") is not None:
            try:
                frame = frame.astype(self.kwds.get("dtype"))
            except TypeError as e:
                # GH#44901 reraise to keep api consistent
                raise ValueError(e)
        return frame
Esempio n. 10
0
def _preprocess_temporal_columns(df: DataFrame) -> DataFrame:
    """Pre-process the columns with temporal dtype to convert
    from numpy datetime objects to pandas datetime objects.
    Casting the dtype of the columns to object type results
    in columns of dtype "object" with the contents of the
    columns being pandas datetime objects, rather than numpy
    datetime objects.

    Args:
        df:
            A DataFrame with temporal columns with numpy
            datetime dtypes.

    Returns:
        A DataFrame without numpy datetime dtypes. The
        content of the columns with temporal dtypes are
        accessible as pandas datetime objects.
    """
    for col in df.select_dtypes(include=["datetime64[ns, UTC]"]):
        df = df.astype({col: "O"})
    for col in df.select_dtypes(include="timedelta64[ns]"):
        df = df.astype({col: "O"})
    return df
Esempio n. 11
0
    def _finalize_output(self, frame: DataFrame) -> DataFrame:
        """
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        """
        num_cols = len(frame.columns)
        multi_index_named = True
        if self.header is None:
            if self.names is None:
                if self.prefix is not None:
                    self.names = [f"{self.prefix}{i}" for i in range(num_cols)]
                elif self.header is None:
                    self.names = range(num_cols)
            if len(self.names) != num_cols:
                # usecols is passed through to pyarrow, we only handle index col here
                # The only way self.names is not the same length as number of cols is
                # if we have int index_col. We should just pad the names(they will get
                # removed anyways) to expected length then.
                self.names = list(
                    range(num_cols - len(self.names))) + self.names
                multi_index_named = False
            frame.columns = self.names
        # we only need the frame not the names
        frame.columns, frame = self._do_date_conversions(frame.columns, frame)
        if self.index_col is not None:
            for i, item in enumerate(self.index_col):
                if is_integer(item):
                    self.index_col[i] = frame.columns[item]
                else:
                    # String case
                    if item not in frame.columns:
                        raise ValueError(f"Index {item} invalid")
            frame.set_index(self.index_col, drop=True, inplace=True)
            # Clear names if headerless and no name given
            if self.header is None and not multi_index_named:
                frame.index.names = [None] * len(frame.index.names)

        if self.kwds.get("dtype") is not None:
            frame = frame.astype(self.kwds.get("dtype"))
        return frame
Esempio n. 12
0
    def test_read_write_dta11(self):
        original = DataFrame([(1, 2, 3, 4)],
                             columns=['good', compat.u('b\u00E4d'), '8number', 'astringwithmorethan32characters______'])
        formatted = DataFrame([(1, 2, 3, 4)],
                              columns=['good', 'b_d', '_8number', 'astringwithmorethan32characters_'])
        formatted.index.name = 'index'
        formatted = formatted.astype(np.int32)

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:
                original.to_stata(path, None)
                # should get a warning for that format.
            tm.assert_equal(len(w), 1)

            written_and_read_again = self.read_dta(path)
            tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted)
Esempio n. 13
0
class PandasBackend(DataBackend):
    _data: DataFrame
    _index: PandasIndex
    _loc: _LocIndexer
    _iloc: _ILocIndexer

    def __init__(
        self,
        data: Optional[Union(Series, DataFrame, dict[str, list])] = None,
        index: Optional[PandasIndex] = None,
    ) -> None:
        if data is None:
            self._data = DataFrame(dtype="object")
        elif type(data) is Series:
            self._data = cast(Series, data).to_frame().transpose()
        elif type(data) is DataFrame:
            self._data = DataFrame(data)
        elif type(data) is dict:
            sample_value = next(iter(data.values()))
            if not isinstance(sample_value, Iterable) or isinstance(
                    sample_value, str):
                self._data = Series(data).to_frame().transpose()
            else:
                self._data = DataFrame(data)
        else:
            raise ValueError(
                f"Received unexpected value type {type(data)}: {data}")

        if index is None:
            self._data.index.name = "index"
            self._index = PandasIndex(self._data.index, [])
        else:
            if not isinstance(index, PandasIndex):
                index = PandasIndex(index)
            self._data.index = index._data
            self._index = index
        self._loc = _LocIndexer(self)
        self._iloc = _ILocIndexer(self)

    def is_link(self) -> bool:
        return False

    def link_token(self) -> Optional[DataToken]:
        return None

    def to_pandas(self) -> DataFrame:
        return self._data

    @property
    def columns(self) -> list[str]:
        return self._data.columns.tolist()

    @property
    def values(self) -> np.ndarray:
        data_values = self._data.values
        shape = data_values.shape
        if shape[1] == 1:
            return np.squeeze(data_values, axis=1)
        elif shape[0] == 1:
            return np.squeeze(data_values, axis=0)
        else:
            return data_values

    @property
    def dtypes(self) -> dict[str, DataType]:
        return {
            col: DataType(dtype)
            for col, dtype in self._data.dtypes.items()
        }

    def cast_columns(self, column_dtypes: dict[str, type]) -> PandasBackend:
        return PandasBackend(self._data.astype(column_dtypes, errors="ignore"))

    def to_dict(self) -> dict[str, any]:
        return self._data.to_dict("list")

    @property
    def index(self) -> Index:
        return self._index

    @property
    def index_name(self) -> Union[str, list[str]]:
        return self._data.index.name

    @property
    def loc(self: PandasBackend) -> LocIndexer[PandasBackend]:
        return self._loc

    @property
    def iloc(self: PandasBackend) -> ILocIndexer[PandasBackend]:
        return self._iloc

    def equals(self, other: PandasBackend) -> bool:
        if type(other) is not PandasBackend:
            return False
        return np.array_equal(self._data.values,
                              other._data.values) and self._index.equals(
                                  other._index)

    def __eq__(self, other) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data == other

    def __ne__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data != other

    def __gt__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data > other

    def __ge__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data >= other

    def __lt__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data < other

    def __le__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data <= other

    def __len__(self) -> int:
        return len(self._data)

    def __iter__(self) -> Generator[str, None, None]:
        return iter(self._data)

    def iterrows(self) -> Generator[tuple[int, PandasBackend], None, None]:
        for i, row in self._data.iterrows():
            yield (i, PandasBackend(row.to_frame().transpose()))

    def itertuples(self, ignore_index: bool = False):
        for values in self._data.itertuples(index=not ignore_index):
            yield values

    def __getitem__(self, item: str) -> Any:
        return PandasBackend(self._data[item].to_frame())

    def getitems(self, items: list[str]) -> PandasBackend:
        return PandasBackend(self._data[items])

    def getmask(self, mask: list[bool]) -> PandasBackend:
        return PandasBackend(self._data[mask])

    def query(self, query: "Query") -> PandasBackend:
        from tanuki.database.adapter.query.pandas_query_compiler import PandasQueryCompiler

        query_compiler = PandasQueryCompiler(self._data)
        query = query_compiler.compile(query)
        return PandasBackend(self._data[query])

    def __setitem__(self, items: str, value: Any) -> None:
        if isinstance(value, PandasBackend):
            value = value._data
        self._data[items] = value

    def get_index(self, index_alias: IndexAlias) -> Index:
        cols = [str(col) for col in index_alias.columns]
        new_data = self._data.set_index(cols)
        new_data.index.name = index_alias.name
        return PandasIndex(new_data.index, cols)

    def set_index(self, index: Union[Index, IndexAlias]) -> PandasBackend:
        cols = [str(col) for col in index.columns]
        new_data = self._data.set_index(cols)
        new_data.index.name = index.name
        new_index = PandasIndex(new_data.index, cols)
        return PandasBackend(new_data, new_index)

    def reset_index(self: PandasBackend) -> PandasBackend:
        new_data = self._data.reset_index(drop=True)
        new_data.index.name = "index"
        new_index = PandasIndex(new_data.index, [])
        return PandasBackend(new_data, new_index)

    def append(
        self: PandasBackend,
        new_backend: PandasBackend,
        ignore_index: bool = False,
    ) -> PandasBackend:
        return PandasBackend(
            self._data.append(new_backend._data, ignore_index=ignore_index))

    def drop_indices(self: PandasBackend, indices: list[int]) -> PandasBackend:
        return PandasBackend(self._data.drop(indices))

    @classmethod
    def concat(
        cls: type[PandasBackend],
        all_backends: list[PandasBackend],
        ignore_index: bool = False,
    ) -> PandasBackend:
        all_data = [backend._data for backend in all_backends]
        return PandasBackend(pd.concat(all_data, ignore_index=ignore_index))

    def nunique(self) -> int:
        return self._data.nunique()

    def __str__(self) -> str:
        return str(self._data)

    def __repr__(self) -> str:
        return str(self)
Esempio n. 14
0
def from_dummies(
    data: DataFrame,
    sep: None | str = None,
    default_category: None | Hashable | dict[str, Hashable] = None,
) -> DataFrame:
    """
    Create a categorical ``DataFrame`` from a ``DataFrame`` of dummy variables.

    Inverts the operation performed by :func:`~pandas.get_dummies`.

    .. versionadded:: 1.5.0

    Parameters
    ----------
    data : DataFrame
        Data which contains dummy-coded variables in form of integer columns of
        1's and 0's.
    sep : str, default None
        Separator used in the column names of the dummy categories they are
        character indicating the separation of the categorical names from the prefixes.
        For example, if your column names are 'prefix_A' and 'prefix_B',
        you can strip the underscore by specifying sep='_'.
    default_category : None, Hashable or dict of Hashables, default None
        The default category is the implied category when a value has none of the
        listed categories specified with a one, i.e. if all dummies in a row are
        zero. Can be a single value for all variables or a dict directly mapping
        the default categories to a prefix of a variable.

    Returns
    -------
    DataFrame
        Categorical data decoded from the dummy input-data.

    Raises
    ------
    ValueError
        * When the input ``DataFrame`` ``data`` contains NA values.
        * When the input ``DataFrame`` ``data`` contains column names with separators
          that do not match the separator specified with ``sep``.
        * When a ``dict`` passed to ``default_category`` does not include an implied
          category for each prefix.
        * When a value in ``data`` has more than one category assigned to it.
        * When ``default_category=None`` and a value in ``data`` has no category
          assigned to it.
    TypeError
        * When the input ``data`` is not of type ``DataFrame``.
        * When the input ``DataFrame`` ``data`` contains non-dummy data.
        * When the passed ``sep`` is of a wrong data type.
        * When the passed ``default_category`` is of a wrong data type.

    See Also
    --------
    :func:`~pandas.get_dummies` : Convert ``Series`` or ``DataFrame`` to dummy codes.
    :class:`~pandas.Categorical` : Represent a categorical variable in classic.

    Notes
    -----
    The columns of the passed dummy data should only include 1's and 0's,
    or boolean values.

    Examples
    --------
    >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0],
    ...                    "c": [0, 0, 1, 0]})

    >>> df
       a  b  c
    0  1  0  0
    1  0  1  0
    2  0  0  1
    3  1  0  0

    >>> pd.from_dummies(df)
    0     a
    1     b
    2     c
    3     a

    >>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0],
    ...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
    ...                    "col2_c": [0, 0, 1]})

    >>> df
          col1_a  col1_b  col2_a  col2_b  col2_c
    0       1       0       0       1       0
    1       0       1       1       0       0
    2       1       0       0       0       1

    >>> pd.from_dummies(df, sep="_")
        col1    col2
    0    a       b
    1    b       a
    2    a       c

    >>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0],
    ...                    "col2_a": [0, 1, 0], "col2_b": [1, 0, 0],
    ...                    "col2_c": [0, 0, 0]})

    >>> df
          col1_a  col1_b  col2_a  col2_b  col2_c
    0       1       0       0       1       0
    1       0       1       1       0       0
    2       0       0       0       0       0

    >>> pd.from_dummies(df, sep="_", default_category={"col1": "d", "col2": "e"})
        col1    col2
    0    a       b
    1    b       a
    2    d       e
    """
    from pandas.core.reshape.concat import concat

    if not isinstance(data, DataFrame):
        raise TypeError("Expected 'data' to be a 'DataFrame'; "
                        f"Received 'data' of type: {type(data).__name__}")

    if data.isna().any().any():
        raise ValueError("Dummy DataFrame contains NA value in column: "
                         f"'{data.isna().any().idxmax()}'")

    # index data with a list of all columns that are dummies
    try:
        data_to_decode = data.astype("boolean", copy=False)
    except TypeError:
        raise TypeError("Passed DataFrame contains non-dummy data")

    # collect prefixes and get lists to slice data for each prefix
    variables_slice = defaultdict(list)
    if sep is None:
        variables_slice[""] = list(data.columns)
    elif isinstance(sep, str):
        for col in data_to_decode.columns:
            prefix = col.split(sep)[0]
            if len(prefix) == len(col):
                raise ValueError(f"Separator not specified for column: {col}")
            variables_slice[prefix].append(col)
    else:
        raise TypeError("Expected 'sep' to be of type 'str' or 'None'; "
                        f"Received 'sep' of type: {type(sep).__name__}")

    if default_category is not None:
        if isinstance(default_category, dict):
            if not len(default_category) == len(variables_slice):
                len_msg = (
                    f"Length of 'default_category' ({len(default_category)}) "
                    f"did not match the length of the columns being encoded "
                    f"({len(variables_slice)})")
                raise ValueError(len_msg)
        elif isinstance(default_category, Hashable):
            default_category = dict(
                zip(variables_slice,
                    [default_category] * len(variables_slice)))
        else:
            raise TypeError("Expected 'default_category' to be of type "
                            "'None', 'Hashable', or 'dict'; "
                            "Received 'default_category' of type: "
                            f"{type(default_category).__name__}")

    cat_data = {}
    for prefix, prefix_slice in variables_slice.items():
        if sep is None:
            cats = prefix_slice.copy()
        else:
            cats = [col[len(prefix + sep):] for col in prefix_slice]
        assigned = data_to_decode.loc[:, prefix_slice].sum(axis=1)
        if any(assigned > 1):
            raise ValueError("Dummy DataFrame contains multi-assignment(s); "
                             f"First instance in row: {assigned.idxmax()}")
        elif any(assigned == 0):
            if isinstance(default_category, dict):
                cats.append(default_category[prefix])
            else:
                raise ValueError(
                    "Dummy DataFrame contains unassigned value(s); "
                    f"First instance in row: {assigned.idxmin()}")
            data_slice = concat(
                (data_to_decode.loc[:, prefix_slice], assigned == 0), axis=1)
        else:
            data_slice = data_to_decode.loc[:, prefix_slice]
        cats_array = np.array(cats, dtype="object")
        # get indices of True entries along axis=1
        cat_data[prefix] = cats_array[data_slice.to_numpy().nonzero()[1]]

    return DataFrame(cat_data)