コード例 #1
0
def test_generator_generate_invalid_domain(domain):
    """
    test generator class init method with invalid domain value
    """
    generator = Generator(data_type="specific")
    with pytest.raises(ValueError):
        generator.generate(num_rows=10, domain=domain)
コード例 #2
0
def test_generator_random_num_columns(num_columns):
    """
    test generator class generate method num_columns parameter
    """
    generator = Generator(data_type="random")
    data = generator.generate(num_rows=10, num_columns=num_columns)
    assert data.shape[1] == num_columns
コード例 #3
0
def test_generator_num_rows(num_rows, domain):
    """
    test generator class generate method num_rows parameter
    """
    generator = Generator(data_type="specific")
    data = generator.generate(num_rows=num_rows, domain=domain)
    assert data.shape[0] == num_rows
コード例 #4
0
def test_generator_generate_random_invalid_params():
    """
    test generator class init method with invalid parameter values
    """
    generator = Generator(data_type="random")
    with pytest.raises(ValueError):
        generator.generate(num_rows=10, metadata={"mock": int}, num_columns=5)
コード例 #5
0
    def store(  # pylint: disable=too-many-arguments,too-many-branches
        self,
        data_dir: Optional[str] = None,
        file_name: Optional[str] = None,
        extension: Optional[str] = None,
        num_rows: Optional[int] = 500,
        **kwargs,
    ) -> str:
        """
        generate a random data file. supports multiple file formats such as
        `.csv`, `.json`, `.parquet`, or `.xlsx`

        ***
        parameters
        ***

        *data_dir* : directory to store generated file in
                        (if None will use `self.default_dir`)

        *file_name*: name of file to generate

        *extension*: format / extension of file
        ***
        """
        # param validation
        # name of file
        if file_name is None:
            file_name = self.default_fn
        # data storage dir
        if data_dir is None:
            data_dir = self.default_dir
        if isinstance(data_dir, str):
            data_dir = pathlib.Path(data_dir)
        # file format / extension
        if extension is None:
            extension = random.choice(SUPPORTED_EXTENSIONS)
        if extension not in SUPPORTED_EXTENSIONS:
            raise ValueError(
                f"invalid extension: {str(extension)}. mok supports: {str(SUPPORTED_EXTENSIONS)}",  # pylint: disable=line-too-long
            )
        # file path
        file_path = data_dir.joinpath(f"{file_name}.{extension}")
        # init data generator
        generator = Generator(data_type=self.data_type)
        # generate data as a pandas df
        data = generator.generate(num_rows=num_rows, **kwargs)  # pylint: disable=invalid-name
        logger.info("storing to: %s", str(file_path))
        if extension == "csv":
            data.to_csv(str(file_path))
        elif extension == "json":
            data.to_json(str(file_path))
        elif extension == "parquet":
            data.to_parquet(str(file_path))
        elif extension == "xlsx":
            data.to_excel(str(file_path))
        else:
            raise ValueError(
                f"unsupported extension: {str(extension)}. mok batch supports: {str(SUPPORTED_EXTENSIONS)}",  # pylint: disable=line-too-long
            )
        return str(file_path)
コード例 #6
0
def test_generator_generate_data_type(data_type):
    """
    test generator class init method with invalid parameter values
    """
    generator = Generator(data_type="random")
    generator.data_type = data_type
    with pytest.raises(ValueError):
        generator.generate(num_rows=10, num_columns=5)
コード例 #7
0
def test_generator_missing(num_rows, missing):
    """
    test generator class generate method missing parameter
    """
    generator = Generator(data_type="random")
    data = generator.generate(num_rows=num_rows, num_columns=5)
    assert data.shape[0] == num_rows
    assert data.isna().sum().sum() == 0
    data = generator.generate(num_rows=num_rows,
                              num_columns=5,
                              missing=missing)
    assert data.shape[0] == num_rows
    assert data.isna().sum().sum() != 0
コード例 #8
0
def test_generator_random_between_range(num_range, col_type):
    """
    test generator class generate method min_val & max_val parameters
    """
    generator = Generator(data_type="random")
    data = generator.generate(
        num_rows=10,
        num_columns=1,
        min_val=num_range[0],
        max_val=num_range[1],
        col_types=[col_type],
    )
    for idx, _ in enumerate(generator.column_names):
        in_between = all(data[generator.column_names[idx]].between(
            num_range[0], num_range[1]))
        assert in_between
コード例 #9
0
def test_generator_random_length(str_len):
    """
    test generator class generate method min_len & max_len parameters
    """
    generator = Generator(data_type="random")
    data = generator.generate(
        num_rows=10,
        num_columns=1,
        min_len=str_len[0],
        max_len=str_len[1],
        col_types=[str],
    )
    for idx, _ in enumerate(generator.column_names):
        in_between = all(data[generator.column_names[idx]].str.len().between(
            str_len[0], str_len[1]))
        assert in_between
コード例 #10
0
def test_generator_metadata(metadata):
    """
    test generator class generate method col_type parameters
    """

    generator = Generator(data_type="random")
    data = generator.generate(
        num_rows=10,
        metadata=metadata,
    )
    numpy_types = [
        NUMPY_TYPES[col_type] for col_type in metadata.values()
        if col_type in NUMPY_TYPES
    ]
    for idx, _ in enumerate(generator.column_names):
        correct_type = all(data[generator.column_names[idx]].apply(
            lambda x: type(x) in metadata.values() or type(x) in numpy_types))
        assert correct_type
コード例 #11
0
def test_generator_init_invalid(data_type):
    """
    test generator class init method with invalid parameter values
    """
    with pytest.raises(ValueError):
        Generator(data_type=data_type)
コード例 #12
0
def test_generator_init(data_type):
    """
    test generator class init method with valid parameters
    """
    generator = Generator(data_type=data_type)
    assert isinstance(generator, Generator)