def gen_unique_values(data_length, dtype='int', seed=None): """ data_length: result length of array of unique values, dtype: dtype of generated array, seed: seed to initialize random state """ if dtype in ('float', 'int'): values = np.arange(data_length, dtype=dtype) if dtype == 'str': values = gen_strlist(data_length) return gen_arr_from_input(data_length, values, repeat=False, seed=seed)
def gen_arr_of_dtype(data_length, dtype='float', random=True, limits=None, nunique=1000, input_data=None, seed=None): """ data_length: result array length, dtype: dtype of generated array, limits: a tuple of (min, max) limits for numeric arrays, nunique: number of unique values in generated array, input_data: 1D sequence of values used for generation of array data, seed: seed to initialize random state """ if seed is not None: np.random.seed(seed) # prefer generation based on input data if it's provided if input_data is not None: return gen_arr_from_input(input_data, data_length, random=random) if dtype == 'float': return np.random.ranf(data_length) if dtype == 'int': default_limits = (np.iinfo(dtype).min, np.iinfo(dtype).max) min_value, max_value = limits or default_limits return np.random.randint(min_value, max_value, data_length) if dtype == 'str': default_strings = gen_strlist(nunique) return np.random.choice(default_strings, data_length) if dtype == 'bool': return np.random.choice([True, False], data_length) return None
def test_df_groupby_mean_by_str_sort_false(self): self._test_df_groupby_method('mean', usecase_name='by_str_mean', input_data=[gen_strlist(n_groups_default, 3, 'abcdef')], groupby_params={'sort': 'False'})