class BaseProvider(object): """This is a base class for all providers.""" def __init__(self, seed: Seed = None) -> None: """Initialize attributes. :param seed: Seed for random. When set to `None` the current system time is used. """ self.seed = seed self.random = random if seed is not None: self.reseed(seed) def reseed(self, seed: Seed = None) -> None: """Reseed the internal random generator. In case we use the default seed, we need to create a per instance random generator, in this case two providers with the same seed will always return the same values. :param seed: Seed for random. When set to `None` the current system time is used. """ if self.random is random: self.random = Random() self.seed = seed self.random.seed(self.seed) def _validate_enum(self, item: Any, enum: Any) -> Any: """Validate enum parameter of method in subclasses of BaseProvider. :param item: Item of enum object. :param enum: Enum object. :return: Value of item. :raises NonEnumerableError: if ``item`` not in ``enum``. """ if item is None: result = get_random_item(enum, self.random) elif item and isinstance(item, enum): result = item else: raise NonEnumerableError(enum) return result.value def __str__(self) -> str: """Human-readable representation of locale.""" return self.__class__.__name__
class DataGenerator: def __init__(self, root_output_path): self.root_output_path = root_output_path self.random = random self.fake = Faker() self.random_mimesis = Random() self.random.seed(0) self.random_mimesis.seed(0) self._ = Field('en', seed=0) Faker.seed(0) def generate_purchases(self, size=10000): fake = self.fake _ = self._ end_date = datetime.fromisoformat('2021-01-01') start_date = end_date - timedelta(days=100) purchases = (lambda: { 'purchaseId': fake.uuid4(), 'purchaseTime': fake.date_time_between(start_date=start_date, end_date=end_date). strftime("%Y-%m-%d %H:%M:%S"), 'billingCost': _('price')[1:], 'isConfirmed': _('boolean') }) schema = Schema(schema=purchases) return schema.create(iterations=size) def generate_mobile_app_clickstream(self, purchases_list: list): random = self.random random_mimesis = self.random_mimesis fake = self.fake # Generate sample data for generates purchases events_fullset = [ 'app_open', 'search_product', 'view_product_details', 'purchase', 'app_close' ] # TODO ADD events without purchases events_without_purchase = [ 'app_open', 'search_product', 'view_product_details', 'purchase', 'app_close' ] channels = [ 'Google Ads', 'Yandex Ads', 'Facebook Ads', 'Twitter Ads', 'VK Ads' ] campaign_ids = random_mimesis.randints(5, 100, 999) res = [] for purchase in purchases_list: # print(purchase['purchaseId'] + ' | ' + purchase['purchaseTime']) purchase_date = datetime.strptime(purchase['purchaseTime'], "%Y-%m-%d %H:%M:%S") user_id = fake.uuid4() app_open_date = purchase_date - timedelta( minutes=random.randint(10, 25), seconds=random.randint(1, 59)) search_date = app_open_date + timedelta( minutes=random.randint(5, 8), seconds=random.randint(1, 59)) view_date = search_date + timedelta(minutes=random.randint(1, 3), seconds=random.randint(1, 59)) app_close_date = purchase_date + timedelta( minutes=random.randint(1, 5)) for type in events_fullset: mobile_event = { 'userId': user_id, 'eventId': fake.uuid4(), 'eventType': type } if type == 'app_open': mobile_event['eventTime'] = app_open_date.strftime( "%Y-%m-%d %H:%M:%S") attributes = { 'campaign_id': random.choice(campaign_ids), 'channel_id': random.choice(channels) } mobile_event['attributes'] = attributes elif type == 'search_product': mobile_event['eventTime'] = search_date.strftime( "%Y-%m-%d %H:%M:%S") elif type == 'view_product_details': mobile_event['eventTime'] = view_date.strftime( "%Y-%m-%d %H:%M:%S") elif type == 'purchase': mobile_event['eventTime'] = purchase_date.strftime( "%Y-%m-%d %H:%M:%S") attributes = {'purchase_id': purchase['purchaseId']} mobile_event['attributes'] = attributes elif type == 'app_close': mobile_event['eventTime'] = app_close_date.strftime( "%Y-%m-%d %H:%M:%S") res.append(mobile_event) return res