예제 #1
0
def gen_unique_values(data_length, dtype='int', seed=None):
    """
    data_length: result length of array of unique values,
    dtype: dtype of generated array,
    seed: seed to initialize random state
    """

    if dtype in ('float', 'int'):
        values = np.arange(data_length, dtype=dtype)
    if dtype == 'str':
        values = gen_strlist(data_length)

    return gen_arr_from_input(data_length, values, repeat=False, seed=seed)
예제 #2
0
def gen_arr_of_dtype(data_length,
                     dtype='float',
                     random=True,
                     limits=None,
                     nunique=1000,
                     input_data=None,
                     seed=None):
    """
    data_length: result array length,
    dtype: dtype of generated array,
    limits: a tuple of (min, max) limits for numeric arrays,
    nunique: number of unique values in generated array,
    input_data: 1D sequence of values used for generation of array data,
    seed: seed to initialize random state
    """

    if seed is not None:
        np.random.seed(seed)

    # prefer generation based on input data if it's provided
    if input_data is not None:
        return gen_arr_from_input(input_data, data_length, random=random)

    if dtype == 'float':
        return np.random.ranf(data_length)
    if dtype == 'int':
        default_limits = (np.iinfo(dtype).min, np.iinfo(dtype).max)
        min_value, max_value = limits or default_limits
        return np.random.randint(min_value, max_value, data_length)
    if dtype == 'str':
        default_strings = gen_strlist(nunique)
        return np.random.choice(default_strings, data_length)
    if dtype == 'bool':
        return np.random.choice([True, False], data_length)

    return None
예제 #3
0
 def test_df_groupby_mean_by_str_sort_false(self):
     self._test_df_groupby_method('mean',
                                  usecase_name='by_str_mean',
                                  input_data=[gen_strlist(n_groups_default, 3, 'abcdef')],
                                  groupby_params={'sort': 'False'})