Python Dimension.insert Examples

Programming Language: Python

Namespace/Package Name: pygrametl.tables

Class/Type: Dimension

Method/Function: insert

Examples at hotexamples.com: 5

Python Dimension.insert - 5 examples found. These are the top rated real world Python examples of pygrametl.tables.Dimension.insert extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Dimension(15)

ensure(8)

insert(2)

lookup(2)

Example #1

Show file

}, {
    'attr1': 25
}, {
    'attr1': None
}, {
    'attr1': 5
}, {
    'attr1': None
}, {
    'attr1': None
}]

wrapper = pygrametl.ConnectionWrapper(connection=null_conn)

dim1 = Dimension(
    name='dim1',
    key='key1',
    attributes=['attr1'],
)

for row in data:
    dim1.insert(row)

dim_rep = DimRepresentation(dim1, null_conn)
notnull_tester = ColumnNotNullPredicate('dim1')
null_rep = DWRepresentation([dim_rep], null_conn)

print(notnull_tester.run(null_rep))

null_conn.close()

Example #2

Show file

File: etl.py Project: onemoreuselessuser/pygram-test

    # Finally each part is reassigned to the row dictionary. It can then be
    # accessed by the caller as the row is a reference to the dict object
    row['year'] = timestamp_split[0]
    row['month'] = timestamp_split[1]
    row['day'] = timestamp_split[2]


# The Location dimension is filled with data from the CSV file, as the file
# contains information for both columns in the table. If the dimension was
# filled using the sales database, it would be necessary to update the
# region attribute with data from the CSV file later irregardless.
# To perform the insertion, the method Dimension.insert() is used which
# inserts a row into the table, and the connection wrapper is asked to
# commit to ensure that the data is present in the database to allow for
# lookups of keys for the fact table
[location_dimension.insert(row) for row in region_source]

# The file handle for the CSV file can then be closed
region_file_handle.close()

# As all the information needed for the other dimensions are stored in the
# sales database, we can loop through all the rows in it, split the timestamp
# into its three parts, and lookup the three dimension keys needed for
# the fact table while letting pygrametl update each dimension with the
# necessary data using Dimension.ensure(). Using this method instead of
# insert combines a lookup with a insertion so a new row only is inserted
# into the dimension or fact table, if it does not yet exist.
for row in sales_source:

    # The timestamp is split into its three parts
    split_timestamp(row)

Example #3

Show file

File: dw.py Project: Betaboxguugi/P6

def setup(dbname, number):

    open(os.path.expanduser(dbname), 'w')

    output_conn = sqlite3.connect(dbname)
    output_cur = output_conn.cursor()

    output_cur.execute("CREATE TABLE dim1 "
                       "(key1 INTEGER PRIMARY KEY, attr1 INTEGER, "
                       "attr2 INTEGER)")

    output_cur.execute("CREATE TABLE dim2 "
                       "(key2 INTEGER PRIMARY KEY, attr3 INTEGER, "
                       "attr4 INTEGER)")

    output_cur.execute("CREATE TABLE ft1 "
                       "(measure INTEGER, key1 INTEGER, key2 INTEGER, "
                       "PRIMARY KEY(key1, key2))")

    output_conn.commit()
    output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

    dim1 = Dimension(
        name='dim1',
        key='key1',
        attributes=['attr1', 'attr2']
    )

    dim2 = Dimension(
        name='dim2',
        key='key2',
        attributes=['attr3', 'attr4'],
    )

    ft1 = FactTable(
        name='ft1',
        keyrefs=['key1', 'key2'],
        measures=['measure']
    )

    facts = []
    dim1_data = []
    dim2_data = []

    total_elapsed = 0.
    print('Generating ft1 data')
    start = time.monotonic()
    counter = 0
    for i in range(1, number + 1):
        for j in range(1, number + 1):
            counter += 10
            facts.append({'key1': i, 'key2': j, 'measure': counter})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Generating dim1 data')
    start = time.monotonic()
    for i in range(1, number + 1):
        dim1_data.append({'attr1': i, 'attr2': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Generating dim2 data')
    start = time.monotonic()
    for i in range(1, number + 1):
        dim2_data.append({'attr3': i, 'attr4': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into ft1')
    start = time.monotonic()
    for row in facts:
        ft1.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into dim1')
    start = time.monotonic()
    for row in dim1_data:
        dim1.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into dim2')
    start = time.monotonic()
    for row in dim2_data:
        dim2.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('DW populated')
    print('Total time: {}{}\n'.format(round(total_elapsed, 3), 's'))
    output_conn.commit()

Example #4

Show file

File: column_null_test_code.py Project: Betaboxguugi/P6

    {'attr1': None},
    {'attr1': 15},
    {'attr1': None},
    {'attr1': None},
    {'attr1': 1},
    {'attr1': 25},
    {'attr1': None},
    {'attr1': 5},
    {'attr1': None},
    {'attr1': None}
]

wrapper = pygrametl.ConnectionWrapper(connection=null_conn)

dim1 = Dimension(
    name='dim1',
    key='key1',
    attributes=['attr1'],
)

for row in data:
    dim1.insert(row)

dim_rep = DimRepresentation(dim1, null_conn)
notnull_tester = ColumnNotNullPredicate('dim1')
null_rep = DWRepresentation([dim_rep], null_conn)

print(notnull_tester.run(null_rep))

null_conn.close()

Example #5

Show file

File: example_1_with_steps.py Project: Betaboxguugi/P6

    attributes=['city', 'region'],
    lookupatts=['city'])

fact_table = FactTable(
    name='factTable',
    keyrefs=['bookid', 'locationid', 'timeid'],
    measures=['sale'])


# NOTE: Most of the following code is taken directly form pygrametl.org and has little to do with
# making this example portable to SQLite and this particular implementation. It is however down here
# that a lot of the cool stuff happens, i.e. the ETL stuff.
# Python function needed to split the timestamp into its three parts

# We put the regions into our location_dimension
[location_dimension.insert(row) for row in region_source]
csv_file_handle.close()

# Here are all of our steps.
# They make the necessary transformations so that we can load our data into the DW
# Each row in the source is processed through the step chain one at a time.

# Beginning of chain for iterating over rows
step_starter = SourceStep(sales_source)

# Calls the time_splitter function on a row
time_splitter = Step(split_timestamp)

# These steps ensure that an entry related to our row is in the given dimension. If not it is inserted.
# Afterwards the primary key of the given entry is inserted into the row.
ensure_book = DimensionStep(dimension=book_dimension, keyfield='bookid')