gender_prob = config["genders"]["percentages"] # data processing config amounts_cpu = config["data_processing"]["amount_in_cpu"] auto_batch = True if config["data_processing"][ "auto_batch_based_in_cpu"] == "True" else False # loading mocking data type business = mimesis.Business(language) person = mimesis.Person(language) address = mimesis.Address(language) dates = mimesis.Datetime(language) # Calculating random probabilities # 245 countries country_prob = common_functions.random_probabilities(1, 245) ages_probab = common_functions.random_probabilities(min_age, max_age) # Global customers array customers = [] def generate_customers(amount, index_start): # generates customers' info # amount: number of customer to generate # index_start: from what index starts results = set() for i in range(amount): customer_id = index_start + i customer_name = person.full_name().replace(
# campaign config campaigns = config[language]["campaigns"] n_campaigns = len(campaigns) campaigns_id_range = list(range(n_campaigns + 1)) # Campaings include Id = 0 --> No Campaign # media config media_sources = config["media"]["sources"] media_prob = config["media"]["percentages"] # payments config payments = config[language]["payments"] n_payments = len(payments) payments_id_range = list(range(1,n_campaigns + 1)) # calculating random probabilities payments_prob = common_functions.random_probabilities(1,n_payments) campaigns_prob = common_functions.random_prob_sum(0, n_campaigns, [0], percentage_no_campaign) # probabilities of campaigns delivery_days_prob = common_functions.random_probabilities(1,5) # delivery days of a orders (between 1 and 5 days) date_format = '%Y-%m-%d %H:%M:%S' # Global orders array orders = [] def generate_orders(amount, index_start): # generate orders # saves intermediary results results = set() index = index_start
outsize = config["shippers"]["total"] language = config["language"] # data processing config amounts_cpu = config["data_processing"]["amount_in_cpu"] auto_batch = True if config["data_processing"][ "auto_batch_based_in_cpu"] == "True" else False # loading mocking data type business = mimesis.Business(language) address = mimesis.Address(language) dates = mimesis.Datetime(language) person = mimesis.Person(language) # 245 countries country_prob = common_functions.random_probabilities(1, 245) shippers = [] def generate_shippers(amount, index_start): # generates shippers' info # amount: number of shippers to generate # index_start: from what index starts results = set() for i in range(amount): shipper_id = index_start + i phone_number = person.telephone() responsible_name = person.full_name().replace(
# orders config orders_file = config["orders"]["outfile"] n_orders = config["orders"]["total"] # products config n_products = config["products"]["total"] # orders info orders = pd.read_csv(out_path + orders_file) # clickstream succeed clickstream_outfile = config["clickstream_succeed"]["outfile"] # random probabilities num_products_prob = common_functions.random_probabilities(1, number_max_prod_per_order) quantities = list(range(1,number_max_prod_per_order + 1)) # Global orders items and clickstrem succeed array orders_items = [] clickstream_succeed = [] ids = set() def generate_orders_items(amount, index_start): # generates orders items and succeed clickstream orders_items_partial = [] clickstream_succeed_partial = [] for i in range(amount): index = index_start + i - 1
n_suppliers = config["suppliers"]["total"] supplier_id_range = list(range(1, n_suppliers + 1)) # data processing config amounts_cpu = config["data_processing"]["amount_in_cpu"] auto_batch = True if config["data_processing"][ "auto_batch_based_in_cpu"] == "True" else False # loading mocking data type food = mimesis.Food(language) person = mimesis.Person(language) dates = mimesis.Datetime(language) code = mimesis.Code(language) # 5 categories categories_prob = common_functions.random_probabilities(1, 5) suppliers_prob = common_functions.random_probabilities(1, n_suppliers) products = [] def generate_products(amount, index_start): # generates products' info # amount: number of products to generate # index_start: from what index starts results = set() for i in range(amount): product_id = index_start + i category_id = np.random.choice([1, 2, 3, 4, 5], p=categories_prob)
amounts_cpu = config["data_processing"]["amount_in_cpu"] auto_batch = True if config["data_processing"][ "auto_batch_based_in_cpu"] == "True" else False # config of campaigns n_campaings = len(config[language]["campaigns"]) campaigns_id_range = list(range(1, n_campaings + 1)) # config of customers n_customers = config["customers"]["total"] # config of products n_products = config["products"]["total"] # calculating random probabilities campaigns_prob = common_functions.random_probabilities(1, n_campaings) # Global clickstream array clickstream = [] def generate_clickstream(amount): # Generates no succeed clicks results = [] for _ in range(amount): customer_id = random.randint(1, n_customers) order_date = common_functions.random_date(click_start_date, click_end_date, random.random())