def setup(self, self_type, value_type, shape, limit, inplace): pd = IMPL[ASV_USE_IMPL] columns = [f"col{x}" for x in range(shape[1])] if self_type == "DataFrame": self.dataset = pd.DataFrame(np.nan, index=pd.RangeIndex(shape[0]), columns=columns) elif self_type == "Series": self.dataset = pd.Series(np.nan, index=pd.RangeIndex(shape[0])) else: assert False if value_type == "scalar": self.value = 18.19 elif value_type == "dict": self.value = {k: k * 1.23 for k in range(shape[0])} elif value_type == "Series": self.value = pd.Series([k * 1.23 for k in range(shape[0])], index=pd.RangeIndex(shape[0])) elif value_type == "DataFrame": if self_type == "Series": raise NotImplementedError self.value = pd.DataFrame( { k: [i + j * 1.23 for j in range(shape[0])] for i, k in enumerate(columns) }, index=pd.RangeIndex(shape[0]), columns=columns, ) else: assert False self.limit = int(limit * shape[0]) if limit else None
def setup(self, value_type, shape, limit): pd = IMPL[ASV_USE_IMPL] self.df = gen_nan_data(ASV_USE_IMPL, *shape) columns = self.df.columns if value_type == "scalar": self.value = 18.19 elif value_type == "dict": self.value = {k: i * 1.23 for i, k in enumerate(columns)} elif value_type == "Series": self.value = pd.Series([i * 1.23 for i in range(len(columns))], index=columns) elif value_type == "DataFrame": self.value = pd.DataFrame( { k: [i + j * 1.23 for j in range(shape[0])] for i, k in enumerate(columns) }, index=pd.RangeIndex(shape[0]), columns=columns, ) else: assert False limit = int(limit * shape[0]) if limit else None self.kw = {"value": self.value, "limit": limit}
def setup(self, shape, limit, inplace): pd = IMPL[ASV_USE_IMPL] columns = [f"col{x}" for x in range(shape[1])] self.df = pd.DataFrame(np.nan, index=pd.RangeIndex(shape[0]), columns=columns) self.limit = int(limit * shape[0]) if limit else None
def gen_nan_data(impl: str, nrows: int, ncols: int) -> dict: """ Generate nan data with caching. The generated data are saved in the dictionary and on a subsequent call, if the keys match, saved data will be returned. Therefore, we need to carefully monitor the changing of saved data and make its copy if needed. Parameters ---------- impl : str Implementation used to create the DataFrame or Series; supported implemetations: {"modin", "pandas"}. nrows : int Number of rows. ncols : int Number of columns. Returns ------- modin.pandas.DataFrame or pandas.DataFrame or modin.pandas.Series or pandas.Series DataFrame or Series with shape (nrows, ncols) or (nrows,), respectively. """ cache_key = (impl, nrows, ncols) if cache_key in data_cache: return data_cache[cache_key] logging.info("Generating nan data {} rows and {} columns".format( nrows, ncols)) if ncols > 1: columns = [f"col{x}" for x in range(ncols)] data = IMPL[impl].DataFrame(np.nan, index=pd.RangeIndex(nrows), columns=columns) elif ncols == 1: data = IMPL[impl].Series(np.nan, index=pd.RangeIndex(nrows)) else: assert False, "Number of columns (ncols) should be >= 1" data_cache[cache_key] = data return data
def setup(self, value_type, shape, limit): pd = IMPL[ASV_USE_IMPL] self.series = gen_nan_data(ASV_USE_IMPL, *shape) if value_type == "scalar": self.value = 18.19 elif value_type == "dict": self.value = {k: k * 1.23 for k in range(shape[0])} elif value_type == "Series": self.value = pd.Series([k * 1.23 for k in range(shape[0])], index=pd.RangeIndex(shape[0])) else: assert False limit = int(limit * shape[0]) if limit else None self.kw = {"value": self.value, "limit": limit}