Ejemplo n.º 1
0
class BaseProvider(object):
    """This is a base class for all providers."""
    def __init__(self, seed: Seed = None) -> None:
        """Initialize attributes.

        :param seed: Seed for random.
            When set to `None` the current system time is used.
        """
        self.seed = seed
        self.random = random

        if seed is not None:
            self.reseed(seed)

    def reseed(self, seed: Seed = None) -> None:
        """Reseed the internal random generator.

        In case we use the default seed, we need to create a per instance
        random generator, in this case two providers with the same seed
        will always return the same values.

        :param seed: Seed for random.
            When set to `None` the current system time is used.
        """
        if self.random is random:
            self.random = Random()

        self.seed = seed
        self.random.seed(self.seed)

    def _validate_enum(self, item: Any, enum: Any) -> Any:
        """Validate enum parameter of method in subclasses of BaseProvider.

        :param item: Item of enum object.
        :param enum: Enum object.
        :return: Value of item.
        :raises NonEnumerableError: if ``item`` not in ``enum``.
        """
        if item is None:
            result = get_random_item(enum, self.random)
        elif item and isinstance(item, enum):
            result = item
        else:
            raise NonEnumerableError(enum)

        return result.value

    def __str__(self) -> str:
        """Human-readable representation of locale."""
        return self.__class__.__name__
Ejemplo n.º 2
0
class DataGenerator:
    def __init__(self, root_output_path):
        self.root_output_path = root_output_path
        self.random = random
        self.fake = Faker()
        self.random_mimesis = Random()
        self.random.seed(0)
        self.random_mimesis.seed(0)
        self._ = Field('en', seed=0)
        Faker.seed(0)

    def generate_purchases(self, size=10000):
        fake = self.fake
        _ = self._
        end_date = datetime.fromisoformat('2021-01-01')
        start_date = end_date - timedelta(days=100)
        purchases = (lambda: {
            'purchaseId':
            fake.uuid4(),
            'purchaseTime':
            fake.date_time_between(start_date=start_date, end_date=end_date).
            strftime("%Y-%m-%d %H:%M:%S"),
            'billingCost':
            _('price')[1:],
            'isConfirmed':
            _('boolean')
        })
        schema = Schema(schema=purchases)
        return schema.create(iterations=size)

    def generate_mobile_app_clickstream(self, purchases_list: list):
        random = self.random
        random_mimesis = self.random_mimesis
        fake = self.fake
        # Generate sample data for generates purchases
        events_fullset = [
            'app_open', 'search_product', 'view_product_details', 'purchase',
            'app_close'
        ]
        # TODO ADD events without purchases
        events_without_purchase = [
            'app_open', 'search_product', 'view_product_details', 'purchase',
            'app_close'
        ]
        channels = [
            'Google Ads', 'Yandex Ads', 'Facebook Ads', 'Twitter Ads', 'VK Ads'
        ]
        campaign_ids = random_mimesis.randints(5, 100, 999)
        res = []

        for purchase in purchases_list:
            # print(purchase['purchaseId'] + ' | ' + purchase['purchaseTime'])
            purchase_date = datetime.strptime(purchase['purchaseTime'],
                                              "%Y-%m-%d %H:%M:%S")
            user_id = fake.uuid4()
            app_open_date = purchase_date - timedelta(
                minutes=random.randint(10, 25), seconds=random.randint(1, 59))
            search_date = app_open_date + timedelta(
                minutes=random.randint(5, 8), seconds=random.randint(1, 59))
            view_date = search_date + timedelta(minutes=random.randint(1, 3),
                                                seconds=random.randint(1, 59))
            app_close_date = purchase_date + timedelta(
                minutes=random.randint(1, 5))

            for type in events_fullset:
                mobile_event = {
                    'userId': user_id,
                    'eventId': fake.uuid4(),
                    'eventType': type
                }
                if type == 'app_open':
                    mobile_event['eventTime'] = app_open_date.strftime(
                        "%Y-%m-%d %H:%M:%S")
                    attributes = {
                        'campaign_id': random.choice(campaign_ids),
                        'channel_id': random.choice(channels)
                    }
                    mobile_event['attributes'] = attributes
                elif type == 'search_product':
                    mobile_event['eventTime'] = search_date.strftime(
                        "%Y-%m-%d %H:%M:%S")
                elif type == 'view_product_details':
                    mobile_event['eventTime'] = view_date.strftime(
                        "%Y-%m-%d %H:%M:%S")
                elif type == 'purchase':
                    mobile_event['eventTime'] = purchase_date.strftime(
                        "%Y-%m-%d %H:%M:%S")
                    attributes = {'purchase_id': purchase['purchaseId']}
                    mobile_event['attributes'] = attributes
                elif type == 'app_close':
                    mobile_event['eventTime'] = app_close_date.strftime(
                        "%Y-%m-%d %H:%M:%S")
                res.append(mobile_event)
        return res