def load_cdr(self): call_dimension = Dimension( name='"DimCall"', key='uuid', attributes=[ "SipFromUser", 'SipToUser', 'HangupCause', 'Direction', 'SwitchName', 'CallerContext', 'IsAnswered', 'Duration', 'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId', 'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection', 'HangupDisposition', 'AgentAnswered', 'IsQueued', 'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName', 'ExtraData' ]) call_fact_table = FactTable( name='"FactCall"', keyrefs=[ 'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId', 'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId', 'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId', 'HangupTimeDimId' ], measures=[ 'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime', 'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec', 'WaitSec', 'ProgressMediaSec', 'FlowBillSec' ]) self.row['uuid'] = call_dimension.ensure(self.row) call_fact_table.insert(self.row)
def load_tickets(self): ticket_dimension = Dimension( name='"DimTicket"', key='ticket_id', attributes=[ 'active', 'tid', 'is_sub_ticket', 'subject', 'reference', 'description', 'priority', 'requester', 'submitter', 'collaborators', 'watchers', 'attachments', 'sub_tickets', 'related_tickets', 'merged_tickets', 'isolated_tags', 'sla', 'comments', 'tags', 'ticket_type', 'status_type', 'channel', 'company', 'tenant', 'assignee', 'assignee_group', 'security_level', 'event_author' ]) ticket_fact_table = FactTable( name='"FactTicket"', keyrefs=[ 'engagement_session_id', 'ticket_id', 'created_at_dim_id', 'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id', 'due_time_dim_id' ], measures=[ 'created_at', 'updated_at', 'due_at', 'time_estimation', 're_opens', 'waited_time', 'worked_time', 'resolution_time' ]) # Updates the row with the primary keys of each dimension while at the same time inserting new data into # each dimension self.row['ticket_id'] = ticket_dimension.ensure(self.row) ticket_fact_table.insert(self.row)
def load_engagement(self): engagement_dimension = Dimension( name='"DimEngagement"', key='engagement_id', attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant', 'notes', 'channel']) engagement_fact_table = FactTable( name='"FactEngagement"', keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'], measures=['updated_at', 'created_at']) self.row['eng_dim_id'] = engagement_dimension.ensure(self.row) engagement_fact_table.insert(self.row) return True
def load_ticket_events(self): ticket_event_dimension = Dimension( name='"DimTicketEvent"', key='ticket_event_id', attributes=['event_type', 'event_author', 'event_body']) ticket_event_fact_table = FactTable( name='"FactTicketEvent"', keyrefs=['ticket_id', 'ticket_event_id'], measures=['event_created_at_dim_id', 'event_created_at']) # Update the row with the primary keys of each dimension while at the same time inserting new data into # each dimension ticket_event_dimension.ensure(self.row) ticket_event_fact_table.insert(self.row)
def load_external_users(self): external_users_dimension = Dimension( name='"DimExternalUsers"', key='external_user_id', attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name', 'locale', 'ssn', 'password', 'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin', 'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags', 'contacts', 'zipcode', 'address_number', 'city', 'province', 'country']) external_users_fact_table = FactTable( name='"FactExternalUsers"', keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'], measures=['created_at', 'updated_at']) self.row['external_user_id'] = external_users_dimension.ensure(self.row) external_users_fact_table.insert(self.row)
time_dimension = Dimension( name='time', key='timeid', attributes=['day', 'month', 'year']) location_dimension = Dimension( name='location', key='locationid', attributes=['city', 'region'], lookupatts=['city']) # A single instance of FactTable is created for the data warehouse's # fact table, with the name of the table, a list of attributes constituting # the primary key of the fact table, and lastly, the list of measures. fact_table = FactTable( name='facttable', keyrefs=['bookid', 'locationid', 'timeid'], measures=['sale']) # A normal Python function is used to split the timestamp into its parts def split_timestamp(row): """Splits a timestamp containing a date into its three parts""" # First the timestamp is extracted from the row dictionary timestamp = row['timestamp'] # Then the string is split on the / in the time stamp timestamp_split = timestamp.split('/') # Finally each part is reassigned to the row dictionary. It can then be # accessed by the caller as the row is a reference to the dict object row['year'] = timestamp_split[0]
""" A sample pygrametl program """ __author__ = 'Mathias Claus Jensen' import pygrametl from pygrametl.datasources import SQLSource from pygrametl.tables import Dimension, FactTable import sqlite3 input_conn = sqlite3.connect('input.db') output_conn = sqlite3.connect('output.db') input_src = SQLSource(input_conn, query='SELECT * dim1') output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn) dim1 = Dimension(name='dim1', key='key1', attributes=['attr1', 'attr2']) dim1 = Dimension(name='dim2', key='key2', attributes=['attr3', 'attr4']) ft1 = FactTable(name='ft1', keyrefs=[ 'key1', ]) input_conn.close() output_conn.close()
def cls_to_pygram_fact(cls, schema_name): fct = FactTable(name=schema_name + '.' + cls.cls_get_name(), keyrefs=cls.cls_get_key_names(), measures=cls.cls_get_measure_names()) return fct
company_info = [('The Hobbit', 1), ('The Bobbit', 5)] # ... and inserting the necessary data. c.executemany("INSERT INTO FACTTABLE (BOOK,ISSUE) VALUES (?,?)", company_info) conn.commit() ConnectionWrapper(conn) dim = Dimension('COMPANY', 'ID', ['NAME', 'AGE', 'ADDRESS', 'SALARY'], ['NAME']) dim_rep = DimRepresentation(dim, conn) dim2 = Dimension('LASTNAME', 'NAME', ['LAST']) dim_rep2 = DimRepresentation(dim2, conn) ft = FactTable('FACTTABLE', ['Book'], ['Issue']) ft_rep = FTRepresentation(ft, conn) dw = DWRepresentation([dim_rep, ft_rep, dim_rep2], conn) expected_list1 = [ {'NAME': 'Anders', 'AGE': 43, 'SALARY': 21000.0, 'ADDRESS': 'Denmark', 'ID': 1}, {'NAME': 'Charles', 'AGE': 50, 'SALARY': 25000.0, 'ADDRESS': 'Texas', 'ID': 2}, {'NAME': 'Wolf', 'AGE': 28, 'SALARY': 19000.0, 'ADDRESS': 'Sweden', 'ID': 3}, {'NAME': 'Hannibal', 'AGE': 45, 'SALARY': 65000.0, 'ADDRESS': 'America', 'ID': 4}, {'NAME': 'Buggy', 'AGE': 67, 'SALARY': 2000.0, 'ADDRESS': 'America', 'ID': 5}
def __create_tables(self): # Systems self.system_dimension = CachedDimension(name='system', key='system_id', attributes=['system_name'], lookupatts=['system_name']) # Stations self.start_station_dimension = CachedDimension( name='start_station', key='start_station_id', attributes=[ 'system_id', 'start_station_short_name', 'start_station_name', 'start_station_latitude', 'start_station_longitude', 'start_station_capacity' ], lookupatts=['system_id', 'start_station_short_name'], rowexpander=start_station_missing_data_expander) self.end_station_dimension = CachedDimension( name='end_station', key='end_station_id', attributes=[ 'system_id', 'end_station_short_name', 'end_station_name', 'end_station_latitude', 'end_station_longitude', 'end_station_capacity' ], lookupatts=['system_id', 'end_station_short_name'], rowexpander=end_station_missing_data_expander) # Trip dates and times self.date_dimension = CachedDimension( name='bdate', key='date_id', attributes=['year', 'month', 'day', 'day_of_week', 'date_string'], lookupatts=['date_string'], rowexpander=date_row_expander) self.time_dimension = CachedDimension( name='btime', key='time_id', attributes=['hour', 'minute', 'time_string', 'time_of_day'], lookupatts=['time_string'], rowexpander=time_row_expander) # Trips self.trip_fact_table = FactTable( name='trips', measures=['duration_s'], keyrefs=[ 'system_id', 'start_station_id', 'end_station_id', 'date_id', 'time_id', 'customer_birthyear_id', 'customer_gender_id', 'customer_type_id', 'bike_id', 'trip_category_id' ]) # weather fact table and date dimension self.weather_fact_table = FactTable(name='weather', measures=[ 'precipitation_in', 'snow_in', 'temp_avg_f', 'temp_min_f', 'temp_max_f', 'wind_mph' ], keyrefs=['system_id', 'date_id']) self.trip_category = CachedDimension(name='trip_category', key='trip_category_id', attributes=['trip_category']) self.bike_dimension = CachedDimension( name='bikes', key='bike_id', attributes=['system_id', 'bike_name'], lookupatts=['system_id', 'bike_name'], defaultidvalue=-1) self.customer_gender_dimension = CachedDimension( name='customer_gender', key='customer_gender_id', attributes=['customer_gender'], lookupatts=['customer_gender']) self.customer_birthyear_dimension = CachedDimension( name='customer_birthyear', key='customer_birthyear_id', attributes=['customer_birthyear'], lookupatts=['customer_birthyear']) self.customer_type_dimension = CachedDimension( name='customer_type', key='customer_type_id', attributes=['customer_type'], lookupatts=['customer_type']) # Station status self.station_status_fact_table = FactTable( name='station_status', keyrefs=['system_id', 'start_station_id', 'date_id', 'time_id'], measures=['bikes_available', 'docks_available']) # Non-cached version of stations for use only with updating Indego stations. self.start_station_noncached_dimension = Dimension( name='start_station', key='start_station_id', attributes=[ 'system_id', 'start_station_short_name', 'start_station_name', 'start_station_latitude', 'start_station_longitude', 'start_station_capacity' ], lookupatts=['system_id', 'start_station_short_name'], rowexpander=start_station_missing_data_expander, defaultidvalue=-1) self.end_station_noncached_dimension = Dimension( name='end_station', key='end_station_id', attributes=[ 'system_id', 'end_station_short_name', 'end_station_name', 'end_station_latitude', 'end_station_longitude', 'end_station_capacity' ], lookupatts=['system_id', 'end_station_short_name'], rowexpander=end_station_missing_data_expander, defaultidvalue=-1)