Example #1
0
    def load_cdr(self):
        call_dimension = Dimension(
            name='"DimCall"',
            key='uuid',
            attributes=[
                "SipFromUser", 'SipToUser', 'HangupCause', 'Direction',
                'SwitchName', 'CallerContext', 'IsAnswered', 'Duration',
                'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId',
                'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection',
                'HangupDisposition', 'AgentAnswered', 'IsQueued',
                'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName',
                'ExtraData'
            ])

        call_fact_table = FactTable(
            name='"FactCall"',
            keyrefs=[
                'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId',
                'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId',
                'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId',
                'HangupTimeDimId'
            ],
            measures=[
                'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime',
                'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec',
                'WaitSec', 'ProgressMediaSec', 'FlowBillSec'
            ])

        self.row['uuid'] = call_dimension.ensure(self.row)
        call_fact_table.insert(self.row)
Example #2
0
    def load_tickets(self):
        ticket_dimension = Dimension(
            name='"DimTicket"',
            key='ticket_id',
            attributes=[
                'active', 'tid', 'is_sub_ticket', 'subject', 'reference',
                'description', 'priority', 'requester', 'submitter',
                'collaborators', 'watchers', 'attachments', 'sub_tickets',
                'related_tickets', 'merged_tickets', 'isolated_tags', 'sla',
                'comments', 'tags', 'ticket_type', 'status_type', 'channel',
                'company', 'tenant', 'assignee', 'assignee_group',
                'security_level', 'event_author'
            ])

        ticket_fact_table = FactTable(
            name='"FactTicket"',
            keyrefs=[
                'engagement_session_id', 'ticket_id', 'created_at_dim_id',
                'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id',
                'updated_time_dim_id', 'due_time_dim_id'
            ],
            measures=[
                'created_at', 'updated_at', 'due_at', 'time_estimation',
                're_opens', 'waited_time', 'worked_time', 'resolution_time'
            ])

        # Updates the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        self.row['ticket_id'] = ticket_dimension.ensure(self.row)
        ticket_fact_table.insert(self.row)
Example #3
0
    def load_engagement(self):
        engagement_dimension = Dimension(
            name='"DimEngagement"',
            key='engagement_id',
            attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant',  'notes', 'channel'])

        engagement_fact_table = FactTable(
            name='"FactEngagement"',
            keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['updated_at', 'created_at'])

        self.row['eng_dim_id'] = engagement_dimension.ensure(self.row)
        engagement_fact_table.insert(self.row)
        return True
Example #4
0
    def load_ticket_events(self):
        ticket_event_dimension = Dimension(
            name='"DimTicketEvent"',
            key='ticket_event_id',
            attributes=['event_type', 'event_author', 'event_body'])

        ticket_event_fact_table = FactTable(
            name='"FactTicketEvent"',
            keyrefs=['ticket_id', 'ticket_event_id'],
            measures=['event_created_at_dim_id', 'event_created_at'])

        # Update the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        ticket_event_dimension.ensure(self.row)
        ticket_event_fact_table.insert(self.row)
Example #5
0
    def load_external_users(self):

        external_users_dimension = Dimension(
            name='"DimExternalUsers"',
            key='external_user_id',
            attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name',
                        'locale', 'ssn', 'password',  'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin',
                        'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags',
                        'contacts', 'zipcode', 'address_number', 'city', 'province', 'country'])

        external_users_fact_table = FactTable(
            name='"FactExternalUsers"',
            keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['created_at', 'updated_at'])

        self.row['external_user_id'] = external_users_dimension.ensure(self.row)
        external_users_fact_table.insert(self.row)
Example #6
0
time_dimension = Dimension(
    name='time',
    key='timeid',
    attributes=['day', 'month', 'year'])

location_dimension = Dimension(
    name='location',
    key='locationid',
    attributes=['city', 'region'],
    lookupatts=['city'])

# A single instance of FactTable is created for the data warehouse's
# fact table, with the name of the table, a list of attributes constituting
# the primary key of the fact table, and lastly, the list of measures.
fact_table = FactTable(
    name='facttable',
    keyrefs=['bookid', 'locationid', 'timeid'],
    measures=['sale'])

# A normal Python function is used to split the timestamp into its parts
def split_timestamp(row):
    """Splits a timestamp containing a date into its three parts"""

    # First the timestamp is extracted from the row dictionary
    timestamp = row['timestamp']

    # Then the string is split on the / in the time stamp
    timestamp_split = timestamp.split('/')

    # Finally each part is reassigned to the row dictionary. It can then be
    # accessed by the caller as the row is a reference to the dict object
    row['year'] = timestamp_split[0]
Example #7
0
""" A sample pygrametl program
"""

__author__ = 'Mathias Claus Jensen'

import pygrametl
from pygrametl.datasources import SQLSource
from pygrametl.tables import Dimension, FactTable
import sqlite3

input_conn = sqlite3.connect('input.db')
output_conn = sqlite3.connect('output.db')

input_src = SQLSource(input_conn, query='SELECT * dim1')
output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

dim1 = Dimension(name='dim1', key='key1', attributes=['attr1', 'attr2'])

dim1 = Dimension(name='dim2', key='key2', attributes=['attr3', 'attr4'])

ft1 = FactTable(name='ft1', keyrefs=[
    'key1',
])

input_conn.close()
output_conn.close()
Example #8
0
 def cls_to_pygram_fact(cls, schema_name):
     fct = FactTable(name=schema_name + '.' + cls.cls_get_name(),
                     keyrefs=cls.cls_get_key_names(),
                     measures=cls.cls_get_measure_names())
     return fct
Example #9
0
company_info = [('The Hobbit', 1), ('The Bobbit', 5)]

# ... and inserting the necessary data.
c.executemany("INSERT INTO FACTTABLE (BOOK,ISSUE) VALUES (?,?)", company_info)

conn.commit()

ConnectionWrapper(conn)

dim = Dimension('COMPANY', 'ID', ['NAME', 'AGE', 'ADDRESS', 'SALARY'], ['NAME'])
dim_rep = DimRepresentation(dim, conn)

dim2 = Dimension('LASTNAME', 'NAME', ['LAST'])
dim_rep2 = DimRepresentation(dim2, conn)

ft = FactTable('FACTTABLE', ['Book'], ['Issue'])
ft_rep = FTRepresentation(ft, conn)

dw = DWRepresentation([dim_rep, ft_rep, dim_rep2], conn)

expected_list1 = [
    {'NAME': 'Anders', 'AGE': 43, 'SALARY': 21000.0, 'ADDRESS': 'Denmark',
     'ID': 1},
    {'NAME': 'Charles', 'AGE': 50, 'SALARY': 25000.0, 'ADDRESS': 'Texas',
     'ID': 2},
    {'NAME': 'Wolf', 'AGE': 28, 'SALARY': 19000.0, 'ADDRESS': 'Sweden',
     'ID': 3},
    {'NAME': 'Hannibal', 'AGE': 45, 'SALARY': 65000.0, 'ADDRESS': 'America',
     'ID': 4},
    {'NAME': 'Buggy', 'AGE': 67, 'SALARY': 2000.0, 'ADDRESS': 'America',
     'ID': 5}
Example #10
0
    def __create_tables(self):

        # Systems
        self.system_dimension = CachedDimension(name='system',
                                                key='system_id',
                                                attributes=['system_name'],
                                                lookupatts=['system_name'])

        # Stations

        self.start_station_dimension = CachedDimension(
            name='start_station',
            key='start_station_id',
            attributes=[
                'system_id', 'start_station_short_name', 'start_station_name',
                'start_station_latitude', 'start_station_longitude',
                'start_station_capacity'
            ],
            lookupatts=['system_id', 'start_station_short_name'],
            rowexpander=start_station_missing_data_expander)

        self.end_station_dimension = CachedDimension(
            name='end_station',
            key='end_station_id',
            attributes=[
                'system_id', 'end_station_short_name', 'end_station_name',
                'end_station_latitude', 'end_station_longitude',
                'end_station_capacity'
            ],
            lookupatts=['system_id', 'end_station_short_name'],
            rowexpander=end_station_missing_data_expander)

        # Trip dates and times

        self.date_dimension = CachedDimension(
            name='bdate',
            key='date_id',
            attributes=['year', 'month', 'day', 'day_of_week', 'date_string'],
            lookupatts=['date_string'],
            rowexpander=date_row_expander)

        self.time_dimension = CachedDimension(
            name='btime',
            key='time_id',
            attributes=['hour', 'minute', 'time_string', 'time_of_day'],
            lookupatts=['time_string'],
            rowexpander=time_row_expander)

        # Trips

        self.trip_fact_table = FactTable(
            name='trips',
            measures=['duration_s'],
            keyrefs=[
                'system_id', 'start_station_id', 'end_station_id', 'date_id',
                'time_id', 'customer_birthyear_id', 'customer_gender_id',
                'customer_type_id', 'bike_id', 'trip_category_id'
            ])

        # weather fact table and date dimension

        self.weather_fact_table = FactTable(name='weather',
                                            measures=[
                                                'precipitation_in', 'snow_in',
                                                'temp_avg_f', 'temp_min_f',
                                                'temp_max_f', 'wind_mph'
                                            ],
                                            keyrefs=['system_id', 'date_id'])

        self.trip_category = CachedDimension(name='trip_category',
                                             key='trip_category_id',
                                             attributes=['trip_category'])

        self.bike_dimension = CachedDimension(
            name='bikes',
            key='bike_id',
            attributes=['system_id', 'bike_name'],
            lookupatts=['system_id', 'bike_name'],
            defaultidvalue=-1)

        self.customer_gender_dimension = CachedDimension(
            name='customer_gender',
            key='customer_gender_id',
            attributes=['customer_gender'],
            lookupatts=['customer_gender'])

        self.customer_birthyear_dimension = CachedDimension(
            name='customer_birthyear',
            key='customer_birthyear_id',
            attributes=['customer_birthyear'],
            lookupatts=['customer_birthyear'])

        self.customer_type_dimension = CachedDimension(
            name='customer_type',
            key='customer_type_id',
            attributes=['customer_type'],
            lookupatts=['customer_type'])

        # Station status
        self.station_status_fact_table = FactTable(
            name='station_status',
            keyrefs=['system_id', 'start_station_id', 'date_id', 'time_id'],
            measures=['bikes_available', 'docks_available'])

        # Non-cached version of stations for use only with updating Indego stations.

        self.start_station_noncached_dimension = Dimension(
            name='start_station',
            key='start_station_id',
            attributes=[
                'system_id', 'start_station_short_name', 'start_station_name',
                'start_station_latitude', 'start_station_longitude',
                'start_station_capacity'
            ],
            lookupatts=['system_id', 'start_station_short_name'],
            rowexpander=start_station_missing_data_expander,
            defaultidvalue=-1)

        self.end_station_noncached_dimension = Dimension(
            name='end_station',
            key='end_station_id',
            attributes=[
                'system_id', 'end_station_short_name', 'end_station_name',
                'end_station_latitude', 'end_station_longitude',
                'end_station_capacity'
            ],
            lookupatts=['system_id', 'end_station_short_name'],
            rowexpander=end_station_missing_data_expander,
            defaultidvalue=-1)