Beispiel #1
0
 def setup(self, self_type, value_type, shape, limit, inplace):
     pd = IMPL[ASV_USE_IMPL]
     columns = [f"col{x}" for x in range(shape[1])]
     if self_type == "DataFrame":
         self.dataset = pd.DataFrame(np.nan,
                                     index=pd.RangeIndex(shape[0]),
                                     columns=columns)
     elif self_type == "Series":
         self.dataset = pd.Series(np.nan, index=pd.RangeIndex(shape[0]))
     else:
         assert False
     if value_type == "scalar":
         self.value = 18.19
     elif value_type == "dict":
         self.value = {k: k * 1.23 for k in range(shape[0])}
     elif value_type == "Series":
         self.value = pd.Series([k * 1.23 for k in range(shape[0])],
                                index=pd.RangeIndex(shape[0]))
     elif value_type == "DataFrame":
         if self_type == "Series":
             raise NotImplementedError
         self.value = pd.DataFrame(
             {
                 k: [i + j * 1.23 for j in range(shape[0])]
                 for i, k in enumerate(columns)
             },
             index=pd.RangeIndex(shape[0]),
             columns=columns,
         )
     else:
         assert False
     self.limit = int(limit * shape[0]) if limit else None
Beispiel #2
0
    def setup(self, value_type, shape, limit):
        pd = IMPL[ASV_USE_IMPL]
        self.df = gen_nan_data(ASV_USE_IMPL, *shape)
        columns = self.df.columns

        if value_type == "scalar":
            self.value = 18.19
        elif value_type == "dict":
            self.value = {k: i * 1.23 for i, k in enumerate(columns)}
        elif value_type == "Series":
            self.value = pd.Series([i * 1.23 for i in range(len(columns))],
                                   index=columns)
        elif value_type == "DataFrame":
            self.value = pd.DataFrame(
                {
                    k: [i + j * 1.23 for j in range(shape[0])]
                    for i, k in enumerate(columns)
                },
                index=pd.RangeIndex(shape[0]),
                columns=columns,
            )
        else:
            assert False
        limit = int(limit * shape[0]) if limit else None
        self.kw = {"value": self.value, "limit": limit}
Beispiel #3
0
 def setup(self, shape, limit, inplace):
     pd = IMPL[ASV_USE_IMPL]
     columns = [f"col{x}" for x in range(shape[1])]
     self.df = pd.DataFrame(np.nan,
                            index=pd.RangeIndex(shape[0]),
                            columns=columns)
     self.limit = int(limit * shape[0]) if limit else None
Beispiel #4
0
def gen_nan_data(impl: str, nrows: int, ncols: int) -> dict:
    """
    Generate nan data with caching.

    The generated data are saved in the dictionary and on a subsequent call,
    if the keys match, saved data will be returned. Therefore, we need
    to carefully monitor the changing of saved data and make its copy if needed.

    Parameters
    ----------
    impl : str
        Implementation used to create the DataFrame or Series;
        supported implemetations: {"modin", "pandas"}.
    nrows : int
        Number of rows.
    ncols : int
        Number of columns.

    Returns
    -------
    modin.pandas.DataFrame or pandas.DataFrame or modin.pandas.Series or pandas.Series
        DataFrame or Series with shape (nrows, ncols) or (nrows,), respectively.
    """
    cache_key = (impl, nrows, ncols)
    if cache_key in data_cache:
        return data_cache[cache_key]

    logging.info("Generating nan data {} rows and {} columns".format(
        nrows, ncols))

    if ncols > 1:
        columns = [f"col{x}" for x in range(ncols)]
        data = IMPL[impl].DataFrame(np.nan,
                                    index=pd.RangeIndex(nrows),
                                    columns=columns)
    elif ncols == 1:
        data = IMPL[impl].Series(np.nan, index=pd.RangeIndex(nrows))
    else:
        assert False, "Number of columns (ncols) should be >= 1"

    data_cache[cache_key] = data
    return data
Beispiel #5
0
    def setup(self, value_type, shape, limit):
        pd = IMPL[ASV_USE_IMPL]
        self.series = gen_nan_data(ASV_USE_IMPL, *shape)

        if value_type == "scalar":
            self.value = 18.19
        elif value_type == "dict":
            self.value = {k: k * 1.23 for k in range(shape[0])}
        elif value_type == "Series":
            self.value = pd.Series([k * 1.23 for k in range(shape[0])],
                                   index=pd.RangeIndex(shape[0]))
        else:
            assert False
        limit = int(limit * shape[0]) if limit else None
        self.kw = {"value": self.value, "limit": limit}