gender_prob = config["genders"]["percentages"]

# data processing config
amounts_cpu = config["data_processing"]["amount_in_cpu"]
auto_batch = True if config["data_processing"][
    "auto_batch_based_in_cpu"] == "True" else False

# loading mocking data type
business = mimesis.Business(language)
person = mimesis.Person(language)
address = mimesis.Address(language)
dates = mimesis.Datetime(language)

# Calculating random probabilities
# 245 countries
country_prob = common_functions.random_probabilities(1, 245)
ages_probab = common_functions.random_probabilities(min_age, max_age)

# Global customers array
customers = []


def generate_customers(amount, index_start):
    # generates customers' info
    # amount: number of customer to generate
    # index_start: from what index starts

    results = set()
    for i in range(amount):
        customer_id = index_start + i
        customer_name = person.full_name().replace(
# campaign config
campaigns = config[language]["campaigns"]
n_campaigns = len(campaigns) 
campaigns_id_range = list(range(n_campaigns + 1)) # Campaings include Id = 0 --> No Campaign

# media config
media_sources = config["media"]["sources"]
media_prob = config["media"]["percentages"]

# payments config
payments = config[language]["payments"]
n_payments = len(payments)
payments_id_range = list(range(1,n_campaigns + 1))

# calculating random probabilities
payments_prob = common_functions.random_probabilities(1,n_payments)
campaigns_prob = common_functions.random_prob_sum(0, n_campaigns, [0], percentage_no_campaign) # probabilities of campaigns
delivery_days_prob = common_functions.random_probabilities(1,5) # delivery days of a orders (between 1 and 5 days)

date_format = '%Y-%m-%d %H:%M:%S'

# Global orders array
orders = []

def generate_orders(amount, index_start):
    # generate orders

    # saves intermediary results
    results = set()
    index = index_start
Beispiel #3
0
outsize = config["shippers"]["total"]
language = config["language"]

# data processing config
amounts_cpu = config["data_processing"]["amount_in_cpu"]
auto_batch = True if config["data_processing"][
    "auto_batch_based_in_cpu"] == "True" else False

# loading mocking data type
business = mimesis.Business(language)
address = mimesis.Address(language)
dates = mimesis.Datetime(language)
person = mimesis.Person(language)

# 245 countries
country_prob = common_functions.random_probabilities(1, 245)

shippers = []


def generate_shippers(amount, index_start):
    # generates shippers' info
    # amount: number of shippers to generate
    # index_start: from what index starts

    results = set()

    for i in range(amount):
        shipper_id = index_start + i
        phone_number = person.telephone()
        responsible_name = person.full_name().replace(
# orders config
orders_file = config["orders"]["outfile"]
n_orders = config["orders"]["total"]

# products config
n_products =  config["products"]["total"]

# orders info
orders = pd.read_csv(out_path + orders_file)

# clickstream succeed
clickstream_outfile = config["clickstream_succeed"]["outfile"]

# random probabilities
num_products_prob = common_functions.random_probabilities(1, number_max_prod_per_order)
quantities = list(range(1,number_max_prod_per_order + 1))

# Global orders items and clickstrem succeed array
orders_items = []
clickstream_succeed = []
ids = set()

def generate_orders_items(amount, index_start):
    # generates orders items and succeed clickstream
    
    orders_items_partial = []
    clickstream_succeed_partial = []

    for i in range(amount):
        index = index_start + i - 1
n_suppliers = config["suppliers"]["total"]
supplier_id_range = list(range(1, n_suppliers + 1))

# data processing config
amounts_cpu = config["data_processing"]["amount_in_cpu"]
auto_batch = True if config["data_processing"][
    "auto_batch_based_in_cpu"] == "True" else False

# loading mocking data type
food = mimesis.Food(language)
person = mimesis.Person(language)
dates = mimesis.Datetime(language)
code = mimesis.Code(language)

# 5 categories
categories_prob = common_functions.random_probabilities(1, 5)
suppliers_prob = common_functions.random_probabilities(1, n_suppliers)

products = []


def generate_products(amount, index_start):
    # generates products' info
    # amount: number of products to generate
    # index_start: from what index starts

    results = set()

    for i in range(amount):
        product_id = index_start + i
        category_id = np.random.choice([1, 2, 3, 4, 5], p=categories_prob)
amounts_cpu = config["data_processing"]["amount_in_cpu"]
auto_batch = True if config["data_processing"][
    "auto_batch_based_in_cpu"] == "True" else False

# config of campaigns
n_campaings = len(config[language]["campaigns"])
campaigns_id_range = list(range(1, n_campaings + 1))

# config of customers
n_customers = config["customers"]["total"]

# config of products
n_products = config["products"]["total"]

# calculating random probabilities
campaigns_prob = common_functions.random_probabilities(1, n_campaings)

# Global clickstream array
clickstream = []


def generate_clickstream(amount):
    # Generates no succeed clicks

    results = []

    for _ in range(amount):
        customer_id = random.randint(1, n_customers)
        order_date = common_functions.random_date(click_start_date,
                                                  click_end_date,
                                                  random.random())