Example #1
0
def fake_faker():

    faker = Faker()
    person_provider = DynamicProvider("PERSON", ["James"])
    location_provider = DynamicProvider("LOCATION", ["Africa"])
    faker.add_provider(person_provider)
    faker.add_provider(location_provider)

    return faker
Example #2
0
    def __init__(self, records: Optional[List[Dict]] = None):
        super().__init__()
        self.records = records

        if self.records:
            for record in self.records:
                if not isinstance(record, Dict):
                    raise TypeError("Each element should be of type Dict")

        # Use an internal provider to sample from the input elements
        self.dynamic_record_provider = DynamicProvider(provider_name="",
                                                       elements=records,
                                                       generator=self)
Example #3
0
    def test_dynamic_add_element(self):
        elements = []
        provider_name = "my_provider"
        provider = DynamicProvider(
            provider_name=provider_name,
            elements=elements,
        )
        faker = Faker()
        faker.add_provider(provider)

        provider.add_element("one")
        provider.add_element("two")

        assert faker.my_provider() in ("one", "two")
Example #4
0
    def test_dynamic_with_special_provider_name(self):
        elements = ["dr.", "doctor", "nurse", "surgeon", "clerk"]
        provider_name = "__special__"  # The provider name cannot start with __

        with pytest.raises(
                ValueError,
                match=
                "Provider name cannot start with __ as it would be ignored by Faker",
        ):
            DynamicProvider(
                provider_name=provider_name,
                elements=elements,
            )
Example #5
0
    def test_with_dynamic(self):
        faker = Faker()
        elements = ["dr.", "doctor", "nurse", "surgeon", "clerk"]
        provider_name = "medical_profession"

        medical_professions_provider = DynamicProvider(
            provider_name=provider_name,
            elements=elements,
        )

        faker.add_provider(medical_professions_provider)

        assert faker.medical_profession() in elements
Example #6
0
    def test_dynamic_with_empty_elements(self):
        elements = []
        provider_name = "my_provider"
        provider = DynamicProvider(
            provider_name=provider_name,
            elements=elements,
        )
        faker = Faker()
        faker.add_provider(provider)

        with pytest.raises(
                ValueError,
                match=
                "Elements should be a list of values the provider samples from",
        ):
            faker.my_provider()
print(f.zipcode())
print(f.locale())
print(f.license_plate())
print(f.phone_number())

for _ in range(10):
    print(f.unique.random_int(min=1, max=10))

for _ in range(5):
    print(f.bothify(text="????-########-??", letters="ABCDEFG"))
    print(f.hexify(text="MAC: ^^:^^:^^:^^:^^:^^", upper=True))


class NeuralProvider(BaseProvider):
    def video_categoty(self):
        return random.choice(["Machine Learning", "Vim", "Linux", "Finance"])

    def video_title(self):
        return "TITLE"


f.add_provider(NeuralProvider)
print(f.video_categoty())
print(f.video_title())

programming_language_provider = DynamicProvider(
    provider_name="programming_language",
    elements=["Python", "Go", "JS", "Ruby", "C#"])

f.add_provider(programming_language_provider)
print(f.programming_language())
Example #8
0
class RecordGenerator(SpanGenerator):
    """Prioritizes the sampling of values found in input records,
    in order to allow semantically similar elements to be drawn together.

    For example, for the input template
    "My first name is {{name}} and my email is {{email}}",
    assuming the user added a list of records containing name and email
    (e.g. `[{"name": "Stephane Smith", "email": "*****@*****.**"}]`),
    this generator will sample the name and email from the same record
    and not independently.
    In case the template contains entities not found in the record,
    the generator will use regular Faker providers.
    In case the template contains the same type multiple times,
    the first would be taken from the record and the next ones
    from regular Faker providers.

    :example:
    >>>from faker import Faker
    >>>from presidio_evaluator.data_generator.faker_extensions import RecordGenerator

    >>>records = [
    >>>     {"name": "Alan", "email": "*****@*****.**"},
    >>>     {"name": "Barry", "email": "*****@*****.**"},
    >>>     {"name": "Cynthia", "email": "*****@*****.**"},
    >>>     ]

    >>>generator = RecordGenerator(records=records)
    >>>faker = Faker(generator=generator)

    >>># Sample fake values from the same record
    >>>faker.parse("I'm {{name}} and my email is {{email}}.")

    I'm Alan and my email is [email protected].

    >>># Using more than one type will use regular Faker providers:
    >>>faker.parse("{{name}}, {{name}} and {{name}} will email {{email}}.")

    Cynthia, Manuel Gonzales and Jillian Riley will email [email protected].

    >>># Sample from a Pandas DataFrame
    >>>import pandas as pd
    >>>df = pd.DataFrame({"name":["a","b","c"],"email":["a@a","b@b","c@c"]}) # or read from file
    >>>records = df.to_dict(orient="records")
    >>>generator = RecordGenerator(records=records)
    >>>faker = Faker(generator=generator)
    >>>faker.parse("I'm {{name}} and my email is {{email}}")

    I'm a and my email is a@a

    >>># Return spans of fake values
    >>>res = faker.parse("I'm {{name}} and my email is {{email}}",add_spans=True)

    {"fake": "I'm c and my email is c@c",
     "spans": "[{\"value\": \"c@c\", \"start\": 22, \"end\": 25, \"type\": \"email\"},
     {\"value\": \"c\", \"start\": 4, \"end\": 5, \"type\": \"name\"}]"
     }

    """
    def __init__(self, records: Optional[List[Dict]] = None):
        super().__init__()
        self.records = records

        if self.records:
            for record in self.records:
                if not isinstance(record, Dict):
                    raise TypeError("Each element should be of type Dict")

        # Use an internal provider to sample from the input elements
        self.dynamic_record_provider = DynamicProvider(provider_name="",
                                                       elements=records,
                                                       generator=self)

    def _get_random_record(self):
        return self.dynamic_record_provider.get_random_value().copy()

    def _match_to_span(self, text: str, **kwargs) -> List[FakerSpan]:
        """Adds logic for sampling from input records if possible."""
        matches = _re_token.finditer(text)
        # Sample one record (Dict containing fake values)
        record = self._get_random_record()

        results: List[FakerSpan] = []
        for match in matches:
            formatter = match.group()[2:-2]
            stripped = formatter.strip()

            value = str(self.format(formatter=stripped, record=record))
            if stripped in record:
                del record[stripped]  # Remove in order not to sample twice

            results.append(
                FakerSpan(
                    type=formatter,
                    start=match.start(),
                    end=match.end(),
                    value=value,
                ))

        return results

    def format(self, formatter: str, *args: Any, **kwargs: Any) -> str:
        """Fill in fake data. If the input record has the requested entity, return its value."""
        record = kwargs.get("record")
        if not record or not record.get(
                formatter):  # type not in record, go to default faker
            return super().format(formatter)

        return record[formatter]