Ejemplo n.º 1
0
    def load_cdr(self):
        call_dimension = Dimension(
            name='"DimCall"',
            key='uuid',
            attributes=[
                "SipFromUser", 'SipToUser', 'HangupCause', 'Direction',
                'SwitchName', 'CallerContext', 'IsAnswered', 'Duration',
                'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId',
                'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection',
                'HangupDisposition', 'AgentAnswered', 'IsQueued',
                'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName',
                'ExtraData'
            ])

        call_fact_table = FactTable(
            name='"FactCall"',
            keyrefs=[
                'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId',
                'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId',
                'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId',
                'HangupTimeDimId'
            ],
            measures=[
                'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime',
                'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec',
                'WaitSec', 'ProgressMediaSec', 'FlowBillSec'
            ])

        self.row['uuid'] = call_dimension.ensure(self.row)
        call_fact_table.insert(self.row)
Ejemplo n.º 2
0
    def load_tickets(self):
        ticket_dimension = Dimension(
            name='"DimTicket"',
            key='ticket_id',
            attributes=[
                'active', 'tid', 'is_sub_ticket', 'subject', 'reference',
                'description', 'priority', 'requester', 'submitter',
                'collaborators', 'watchers', 'attachments', 'sub_tickets',
                'related_tickets', 'merged_tickets', 'isolated_tags', 'sla',
                'comments', 'tags', 'ticket_type', 'status_type', 'channel',
                'company', 'tenant', 'assignee', 'assignee_group',
                'security_level', 'event_author'
            ])

        ticket_fact_table = FactTable(
            name='"FactTicket"',
            keyrefs=[
                'engagement_session_id', 'ticket_id', 'created_at_dim_id',
                'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id',
                'updated_time_dim_id', 'due_time_dim_id'
            ],
            measures=[
                'created_at', 'updated_at', 'due_at', 'time_estimation',
                're_opens', 'waited_time', 'worked_time', 'resolution_time'
            ])

        # Updates the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        self.row['ticket_id'] = ticket_dimension.ensure(self.row)
        ticket_fact_table.insert(self.row)
Ejemplo n.º 3
0
    def load_originated_legs(self):

        self.row2["uuid"] = self.uuid
        self.row2["CallUuid"] = self.call_uuid
        self.row2["MemberUuid"] = self.member_uuid
        self.row2["IsTransferLeg"] = self.is_transfer_leg
        self.row2["Direction"] = self.direction
        self.row2["ObjType"] = self.op_cat
        self.row2["originated_leg"] = self.originatedLegsList

        originated_legs_dimension = Dimension(
            name='"DimOriginatedLegs"',
            key='uuid',
            attributes=['CallUuid', 'Direction', 'MemberUuid', 'originated_leg', 'IsTransferLeg', 'ObjType'])

        originated_legs_dimension_temp = Dimension(
            name='"DimOriginatedLegsTemp"',
            key='uuid',
            attributes=['CallUuid', 'Direction', 'MemberUuid', 'originated_leg', 'IsTransferLeg', 'ObjType'])

        self.row2['uuid'] = originated_legs_dimension.ensure(self.row2)
        self.row2['uuid'] = originated_legs_dimension_temp.ensure(self.row2)
Ejemplo n.º 4
0
    def load_engagement(self):
        engagement_dimension = Dimension(
            name='"DimEngagement"',
            key='engagement_id',
            attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant',  'notes', 'channel'])

        engagement_fact_table = FactTable(
            name='"FactEngagement"',
            keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['updated_at', 'created_at'])

        self.row['eng_dim_id'] = engagement_dimension.ensure(self.row)
        engagement_fact_table.insert(self.row)
        return True
Ejemplo n.º 5
0
    def load_ticket_events(self):
        ticket_event_dimension = Dimension(
            name='"DimTicketEvent"',
            key='ticket_event_id',
            attributes=['event_type', 'event_author', 'event_body'])

        ticket_event_fact_table = FactTable(
            name='"FactTicketEvent"',
            keyrefs=['ticket_id', 'ticket_event_id'],
            measures=['event_created_at_dim_id', 'event_created_at'])

        # Update the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        ticket_event_dimension.ensure(self.row)
        ticket_event_fact_table.insert(self.row)
Ejemplo n.º 6
0
    def cls_to_pygram_dim(cls, schema_name, lookup_fields=[]):
        # cls.cls_init_cols()
        # if not lookup_fields:

        lookup_fields = cls.cls_get_lookup_fields()
        if lookup_fields:
            dim = CachedDimension(name=schema_name + '.' + cls.cls_get_name(),
                                  key='id',
                                  attributes=cls.cls_get_column_names_no_id(),
                                  lookupatts=lookup_fields,
                                  cachefullrows=True)
        else:
            dim = Dimension(name=schema_name + '.' + cls.cls_get_name(),
                            key='id',
                            attributes=cls.cls_get_column_names_no_id())
        return dim
Ejemplo n.º 7
0
    def load_external_users(self):

        external_users_dimension = Dimension(
            name='"DimExternalUsers"',
            key='external_user_id',
            attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name',
                        'locale', 'ssn', 'password',  'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin',
                        'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags',
                        'contacts', 'zipcode', 'address_number', 'city', 'province', 'country'])

        external_users_fact_table = FactTable(
            name='"FactExternalUsers"',
            keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['created_at', 'updated_at'])

        self.row['external_user_id'] = external_users_dimension.ensure(self.row)
        external_users_fact_table.insert(self.row)
Ejemplo n.º 8
0
}, {
    'attr1': 75,
    'attr2': 75,
    'attr3': 75,
    'attr4': 75
}, {
    'attr1': 76,
    'attr2': 76,
    'attr3': 76,
    'attr4': 76
}]

wrapper = pygrametl.ConnectionWrapper(connection=conn)

dim1 = Dimension(name='dim1',
                 key='key1',
                 attributes=['attr1', 'key2', 'key3'],
                 lookupatts=['attr1'])

dim2 = Dimension(name='dim2',
                 key='key2',
                 attributes=['attr2', 'key4'],
                 lookupatts=['attr2'])

dim3 = Dimension(name='dim3', key='key3', attributes=['attr3'])

dim4 = Dimension(name='dim4', key='key4', attributes=['attr4'])

special_snowflake = SnowflakedDimension(references=[(dim1,
                                                     [dim2, dim3]), (dim2,
                                                                     dim4)])
Ejemplo n.º 9
0
}, {
    'attr1': 25
}, {
    'attr1': None
}, {
    'attr1': 5
}, {
    'attr1': None
}, {
    'attr1': None
}]

wrapper = pygrametl.ConnectionWrapper(connection=null_conn)

dim1 = Dimension(
    name='dim1',
    key='key1',
    attributes=['attr1'],
)

for row in data:
    dim1.insert(row)

dim_rep = DimRepresentation(dim1, null_conn)
notnull_tester = ColumnNotNullPredicate('dim1')
null_rep = DWRepresentation([dim_rep], null_conn)

print(notnull_tester.run(null_rep))

null_conn.close()
Ejemplo n.º 10
0
# handle to the file, as pygrametl uses Pythons DictReader for CSV files,
# and the header of the CSV file contains information about each column.
region_file_handle = open('c:\\work\\python\\region.csv', 'r', 16384)
region_source = CSVSource(f=region_file_handle, delimiter=',')

# An instance of Dimension is created for each dimension in the data
# warehouse. For each table, the name of the table, the primary key of
# the table, and a list of non key attributes in the table, are added.
# In addition, for the location dimension we specify which attributes
# should be used for a lookup of the primary key, as only the city is
# present in the sales database and is enough to perform a lookup of
# a unique primary key. As mentioned in the beginning of the guide, using
# named parameters is strongly encouraged.

book_dimension = Dimension(
    name='book',
    key='bookid',
    attributes=['book', 'genre'])

time_dimension = Dimension(
    name='time',
    key='timeid',
    attributes=['day', 'month', 'year'])

location_dimension = Dimension(
    name='location',
    key='locationid',
    attributes=['city', 'region'],
    lookupatts=['city'])

# A single instance of FactTable is created for the data warehouse's
# fact table, with the name of the table, a list of attributes constituting
Ejemplo n.º 11
0
def convert_FullDate(row):
    # Convert FullDate to datetime
    return datetime.strptime(row['FullDate'], '%d/%m/%Y')


def key_finder(row, namemapping):
    # Assign the primary key of DimDate
    return row[namemapping['FullDate']]


# Create dimension and fact table abstractions for use in the ETL flow
DimDate = Dimension(
    name='DimDate',
    key='DateKey',
    attributes=['FullDate', 'DateName', 'DayOfWeek', 'DayNameOfWeek', 'DayOfMonth', 'DayOfYear', 'WeekdayWeekend', \
                'WeekOfYear', 'MonthName', 'MonthOfYear', 'IsLastDayOfMonth', 'CalendarQuarter', 'CalendarYear', \
                'CalendarYearMonth', 'CalendarYearQtr', 'FiscalMonthOfYear', 'FiscalQuarter', 'FiscalYear', \
                'FiscalYearMonth', 'FiscalYearQtr'],
    lookupatts=['FullDate'],
    idfinder=key_finder
)

DimGroup = TypeOneSlowlyChangingDimension(
    name='DimGroup',
    key='GroupKey',
    attributes=['GroupId', 'GroupName', 'Version'],
    lookupatts=['GroupId'])

DimGroupCategory = Dimension(
    name='DimGroupCategory',
    key='GroupCategoryKey',
    attributes=['GroupCategoryId', 'GroupCategoryName'],
Ejemplo n.º 12
0
""" A sample pygrametl program
"""

__author__ = 'Mathias Claus Jensen'

import pygrametl
from pygrametl.datasources import SQLSource
from pygrametl.tables import Dimension, FactTable
import sqlite3

input_conn = sqlite3.connect('input.db')
output_conn = sqlite3.connect('output.db')

input_src = SQLSource(input_conn, query='SELECT * dim1')
output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

dim1 = Dimension(name='dim1', key='key1', attributes=['attr1', 'attr2'])

dim1 = Dimension(name='dim2', key='key2', attributes=['attr3', 'attr4'])

ft1 = FactTable(name='ft1', keyrefs=[
    'key1',
])

input_conn.close()
output_conn.close()
Ejemplo n.º 13
0
# Making table to test on...
c.execute('''CREATE TABLE FACTTABLE
    (Book           TEXT   NOT NULL,
    Issue            INT    )''')

company_info = [('The Hobbit', 1), ('The Bobbit', 5)]

# ... and inserting the necessary data.
c.executemany("INSERT INTO FACTTABLE (BOOK,ISSUE) VALUES (?,?)", company_info)

conn.commit()

ConnectionWrapper(conn)

dim = Dimension('COMPANY', 'ID', ['NAME', 'AGE', 'ADDRESS', 'SALARY'], ['NAME'])
dim_rep = DimRepresentation(dim, conn)

dim2 = Dimension('LASTNAME', 'NAME', ['LAST'])
dim_rep2 = DimRepresentation(dim2, conn)

ft = FactTable('FACTTABLE', ['Book'], ['Issue'])
ft_rep = FTRepresentation(ft, conn)

dw = DWRepresentation([dim_rep, ft_rep, dim_rep2], conn)

expected_list1 = [
    {'NAME': 'Anders', 'AGE': 43, 'SALARY': 21000.0, 'ADDRESS': 'Denmark',
     'ID': 1},
    {'NAME': 'Charles', 'AGE': 50, 'SALARY': 25000.0, 'ADDRESS': 'Texas',
     'ID': 2},
Ejemplo n.º 14
0
    SALARY         REAL);''')

company_info = [('Anders', 43, 'Denmark', 21000.00),
                ('Charles', 50, 'Texas', 25000.00),
                ('Wolf', 28, 'Sweden', 19000.00),
                ('Hannibal', 45, 'America', 65000.00),
                ('Hannibal', 45, 'America', 65000.00),
                ('Buggy', 67, 'America', 2000), ('Buggy', 67, 'America', 2000),
                ('Buggy', 67, 'America', 2000), ('Buggy', 67, 'America', 2000),
                ('Buggy', 67, 'America', 2000), ('Buggy', 67, 'America', 2000)]

# ... and inserting the necessary data.
c.executemany("INSERT INTO COMPANY (NAME,AGE,ADDRESS,SALARY) VALUES (?,?,?,?)",
              company_info)
conn.commit()
columns = ['NAME', 'AGE', 'ADDRESS', 'SALARY']

wrapper = ConnectionWrapper(conn)
dim = Dimension(name='COMPANY',
                key='ID',
                attributes=['NAME', 'AGE', 'ADDRESS', 'SALARY'],
                lookupatts=['NAME'])

dim_rep = DimRepresentation(dim, conn)
dw = DWRepresentation([dim_rep], conn)

dup_predicate = DuplicatePredicate('company', ['ID'], True)
print(dup_predicate.run(dw))

conn.close()
Ejemplo n.º 15
0
    def __create_tables(self):

        # Systems
        self.system_dimension = CachedDimension(name='system',
                                                key='system_id',
                                                attributes=['system_name'],
                                                lookupatts=['system_name'])

        # Stations

        self.start_station_dimension = CachedDimension(
            name='start_station',
            key='start_station_id',
            attributes=[
                'system_id', 'start_station_short_name', 'start_station_name',
                'start_station_latitude', 'start_station_longitude',
                'start_station_capacity'
            ],
            lookupatts=['system_id', 'start_station_short_name'],
            rowexpander=start_station_missing_data_expander)

        self.end_station_dimension = CachedDimension(
            name='end_station',
            key='end_station_id',
            attributes=[
                'system_id', 'end_station_short_name', 'end_station_name',
                'end_station_latitude', 'end_station_longitude',
                'end_station_capacity'
            ],
            lookupatts=['system_id', 'end_station_short_name'],
            rowexpander=end_station_missing_data_expander)

        # Trip dates and times

        self.date_dimension = CachedDimension(
            name='bdate',
            key='date_id',
            attributes=['year', 'month', 'day', 'day_of_week', 'date_string'],
            lookupatts=['date_string'],
            rowexpander=date_row_expander)

        self.time_dimension = CachedDimension(
            name='btime',
            key='time_id',
            attributes=['hour', 'minute', 'time_string', 'time_of_day'],
            lookupatts=['time_string'],
            rowexpander=time_row_expander)

        # Trips

        self.trip_fact_table = FactTable(
            name='trips',
            measures=['duration_s'],
            keyrefs=[
                'system_id', 'start_station_id', 'end_station_id', 'date_id',
                'time_id', 'customer_birthyear_id', 'customer_gender_id',
                'customer_type_id', 'bike_id', 'trip_category_id'
            ])

        # weather fact table and date dimension

        self.weather_fact_table = FactTable(name='weather',
                                            measures=[
                                                'precipitation_in', 'snow_in',
                                                'temp_avg_f', 'temp_min_f',
                                                'temp_max_f', 'wind_mph'
                                            ],
                                            keyrefs=['system_id', 'date_id'])

        self.trip_category = CachedDimension(name='trip_category',
                                             key='trip_category_id',
                                             attributes=['trip_category'])

        self.bike_dimension = CachedDimension(
            name='bikes',
            key='bike_id',
            attributes=['system_id', 'bike_name'],
            lookupatts=['system_id', 'bike_name'],
            defaultidvalue=-1)

        self.customer_gender_dimension = CachedDimension(
            name='customer_gender',
            key='customer_gender_id',
            attributes=['customer_gender'],
            lookupatts=['customer_gender'])

        self.customer_birthyear_dimension = CachedDimension(
            name='customer_birthyear',
            key='customer_birthyear_id',
            attributes=['customer_birthyear'],
            lookupatts=['customer_birthyear'])

        self.customer_type_dimension = CachedDimension(
            name='customer_type',
            key='customer_type_id',
            attributes=['customer_type'],
            lookupatts=['customer_type'])

        # Station status
        self.station_status_fact_table = FactTable(
            name='station_status',
            keyrefs=['system_id', 'start_station_id', 'date_id', 'time_id'],
            measures=['bikes_available', 'docks_available'])

        # Non-cached version of stations for use only with updating Indego stations.

        self.start_station_noncached_dimension = Dimension(
            name='start_station',
            key='start_station_id',
            attributes=[
                'system_id', 'start_station_short_name', 'start_station_name',
                'start_station_latitude', 'start_station_longitude',
                'start_station_capacity'
            ],
            lookupatts=['system_id', 'start_station_short_name'],
            rowexpander=start_station_missing_data_expander,
            defaultidvalue=-1)

        self.end_station_noncached_dimension = Dimension(
            name='end_station',
            key='end_station_id',
            attributes=[
                'system_id', 'end_station_short_name', 'end_station_name',
                'end_station_latitude', 'end_station_longitude',
                'end_station_capacity'
            ],
            lookupatts=['system_id', 'end_station_short_name'],
            rowexpander=end_station_missing_data_expander,
            defaultidvalue=-1)