コード例 #1
0
def main():
    num_products = get_num_products(num_iterations, scale_factor)

    schema = [
        #  DIMS
        ('naive_type2_scd', {
            'name': 'customer',
            'entity_generator': generate_customer,
            'num_iterations': num_iterations,
            'mutation_rate': 0.3,  # Will update mutate cols 30% of the time
            'mutating_cols': ['address']  # Only address will update
        }),
        ('naive', {
            'name': 'product',
            'entity_generator': generate_product,
            'num_iterations': num_products
        }),
        #  FACTS
        ('naive', {
            'name': 'orders',
            'entity_generator': generate_order,
            'num_iterations': num_iterations * scale_factor,
            'relations': [{'name': 'customer'}, {'name': 'product'}]
        })]

    dummy_data = StarSchemaModel.from_list(schema)
    dummy_data.generate_all_datasets(print_progress=True)
    dummy_data.to_csv(folder)
    dummy_data.to_pickled_pyschema(folder)
    print("Done")
コード例 #2
0
def main():
    num_products = get_num_products(num_iterations, scale_factor)

    schema = [
        #  DIMS
        (
            'naive',
            {
                'name': 'customer',  # the name of the entity/table
                'entity_generator':
                generate_customer,  # function that defines entity
                'num_iterations':
                num_iterations  # How many times to run that function
            }),
        ('naive', {
            'name': 'product',
            'entity_generator': generate_product,
            'num_iterations': num_products
        }),
        ('naive', {
            'name': 'currency',
            'entity_generator': generate_currency,
            'num_iterations': num_currencies
        }),
        #  FACTS
        ('naive', {
            'name': 'orders',
            'entity_generator': generate_order,
            'num_iterations': num_iterations * scale_factor,
            'relations': [{
                'name': 'customer'
            }, {
                'name': 'currency'
            }]
        }),
        (
            'naive',
            {
                'name':
                'order_item',
                'entity_generator':
                generate_order_item,
                'num_iterations':
                num_iterations * scale_factor,
                'num_entities_per_iteration':
                lambda: random.randint(
                    1, 3
                ),  # Number of facts per iteration (e.g. 3 items 1 order)
                'relations': [{
                    'name': 'orders',
                    'unique': True
                }, {
                    'name': 'product',
                    'type': 'many_to_many',
                    'unique': True
                }]
                # Each iteration has the same entity link for one_to_many relations (e.g. one order_id per order_item)
                # For many_to_many this link is sampled - if unique_per_fact then it is sampled without replacement.
                # In this example an order has multiple order items, each linked to a unique_per_fact product within that order
                # If an order could have multiple of the same product then unique_per_fact would be false
            }),
        (
            'naive',
            {
                'name': 'currency_conversion',
                'entity_generator': generate_currency_conv,
                'num_iterations': num_currencies,
                'num_entities_per_iteration':
                num_days,  # We get one record per currency per day
                'relations': [{
                    'name': 'currency',
                    'unique': True
                }]
                # Here the default type is one_to_many - in this case there will be a unique value for each iteration
                # Sampled from the source table - note this will fail if there are more iterations that values in
                # The original table.
            })
    ]

    dummy_data = StarSchemaModel.from_list(schema)
    dummy_data.generate_all_datasets(print_progress=True)
    dummy_data.to_csv(folder)
    dummy_data.to_pickled_pyschema(folder)
    print("Done")
コード例 #3
0
def main():
    num_products = get_num_products(num_iterations, scale_factor)

    schema = [
        #  DIMS
        ('naive_type2_scd', {
            'name': 'customer',
            'min_valid_from': low_date,
            'max_valid_from': high_date,
            'entity_generator': generate_customer,
            'num_iterations': num_iterations,
            'mutation_rate': 0.1,  # Will update mutate cols 10% of the time
            'mutating_cols': ['address']  # Only address will update
        }),
        ('naive', {
            'name': 'product',
            'entity_generator': generate_product,
            'num_iterations': num_products
        }),
        ('naive', {
            'name': 'currency',
            'entity_generator': generate_currency,
            'num_iterations': num_currencies
        }),
        #  FACTS
        ('naive', {
            'name': 'orders',
            'entity_generator': generate_order,
            'num_iterations': num_iterations * scale_factor,
            'relations': [{'name': 'customer'},
                          {'name': 'currency'}]
        }),
        ('naive', {
            'name': 'order_item',
            'entity_generator': generate_order_item,
            'num_iterations': num_iterations * scale_factor,
            'num_entities_per_iteration': lambda: random.randint(1, 3),
        # Number of facts per iteration (e.g. 3 items 1 order)
            'relations': [{'name': 'orders', 'unique': True},
                          {'name': 'product', 'type': 'many_to_many', 'unique': True}]
            # Each iteration has the same entity link for one_to_many relations (e.g. one order_id per order_item)
            # For many_to_many this link is sampled - if unique_per_fact then it is sampled without replacement.
            # In this example an order has multiple order items, each linked to a unique_per_fact product within that order
            # If an order could have multiple of the same product then unique_per_fact would be false
        }),
        ('naive', {
            'name': 'currency_conversion',
            'entity_generator': generate_currency_conv,
            'num_iterations': num_currencies,
            'num_entities_per_iteration': num_days,  # We get one record per currency per day
            'relations': [{'name': 'currency', 'unique': True}]
            # Here the default type is one_to_many - in this case there will be a unique value for each iteration
            # Sampled from the source table - note this will fail if there are more iterations that values in
            # The original table.
        })
    ]

    dummy_data = StarSchemaModel.from_list(schema)
    dummy_data.generate_all_datasets(print_progress=True)
    dummy_data.to_csv(data_path)

    padapter = PostgresSchemaAdapter(dummy_data)
    padapter.to_dbt_schema(path=schema_path)

    bqadapter = BigquerySchemaAdapter(dummy_data)
    bqadapter.to_dbt_schema(path=schema_path)

    print("Done")