Esempio n. 1
0
    def load_ticket_events(self):
        ticket_event_dimension = Dimension(
            name='"DimTicketEvent"',
            key='ticket_event_id',
            attributes=['event_type', 'event_author', 'event_body'])

        ticket_event_fact_table = FactTable(
            name='"FactTicketEvent"',
            keyrefs=['ticket_id', 'ticket_event_id'],
            measures=['event_created_at_dim_id', 'event_created_at'])

        # Update the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        ticket_event_dimension.ensure(self.row)
        ticket_event_fact_table.insert(self.row)
Esempio n. 2
0
    def load_cdr(self):
        call_dimension = Dimension(
            name='"DimCall"',
            key='uuid',
            attributes=[
                "SipFromUser", 'SipToUser', 'HangupCause', 'Direction',
                'SwitchName', 'CallerContext', 'IsAnswered', 'Duration',
                'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId',
                'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection',
                'HangupDisposition', 'AgentAnswered', 'IsQueued',
                'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName',
                'ExtraData'
            ])

        call_fact_table = FactTable(
            name='"FactCall"',
            keyrefs=[
                'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId',
                'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId',
                'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId',
                'HangupTimeDimId'
            ],
            measures=[
                'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime',
                'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec',
                'WaitSec', 'ProgressMediaSec', 'FlowBillSec'
            ])

        self.row['uuid'] = call_dimension.ensure(self.row)
        call_fact_table.insert(self.row)
Esempio n. 3
0
    def load_tickets(self):
        ticket_dimension = Dimension(
            name='"DimTicket"',
            key='ticket_id',
            attributes=[
                'active', 'tid', 'is_sub_ticket', 'subject', 'reference',
                'description', 'priority', 'requester', 'submitter',
                'collaborators', 'watchers', 'attachments', 'sub_tickets',
                'related_tickets', 'merged_tickets', 'isolated_tags', 'sla',
                'comments', 'tags', 'ticket_type', 'status_type', 'channel',
                'company', 'tenant', 'assignee', 'assignee_group',
                'security_level', 'event_author'
            ])

        ticket_fact_table = FactTable(
            name='"FactTicket"',
            keyrefs=[
                'engagement_session_id', 'ticket_id', 'created_at_dim_id',
                'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id',
                'updated_time_dim_id', 'due_time_dim_id'
            ],
            measures=[
                'created_at', 'updated_at', 'due_at', 'time_estimation',
                're_opens', 'waited_time', 'worked_time', 'resolution_time'
            ])

        # Updates the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        self.row['ticket_id'] = ticket_dimension.ensure(self.row)
        ticket_fact_table.insert(self.row)
Esempio n. 4
0
    def load_originated_legs(self):

        self.row2["uuid"] = self.uuid
        self.row2["CallUuid"] = self.call_uuid
        self.row2["MemberUuid"] = self.member_uuid
        self.row2["IsTransferLeg"] = self.is_transfer_leg
        self.row2["Direction"] = self.direction
        self.row2["ObjType"] = self.op_cat
        self.row2["originated_leg"] = self.originatedLegsList

        originated_legs_dimension = Dimension(
            name='"DimOriginatedLegs"',
            key='uuid',
            attributes=['CallUuid', 'Direction', 'MemberUuid', 'originated_leg', 'IsTransferLeg', 'ObjType'])

        originated_legs_dimension_temp = Dimension(
            name='"DimOriginatedLegsTemp"',
            key='uuid',
            attributes=['CallUuid', 'Direction', 'MemberUuid', 'originated_leg', 'IsTransferLeg', 'ObjType'])

        self.row2['uuid'] = originated_legs_dimension.ensure(self.row2)
        self.row2['uuid'] = originated_legs_dimension_temp.ensure(self.row2)
Esempio n. 5
0
    def load_engagement(self):
        engagement_dimension = Dimension(
            name='"DimEngagement"',
            key='engagement_id',
            attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant',  'notes', 'channel'])

        engagement_fact_table = FactTable(
            name='"FactEngagement"',
            keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['updated_at', 'created_at'])

        self.row['eng_dim_id'] = engagement_dimension.ensure(self.row)
        engagement_fact_table.insert(self.row)
        return True
Esempio n. 6
0
    def load_external_users(self):

        external_users_dimension = Dimension(
            name='"DimExternalUsers"',
            key='external_user_id',
            attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name',
                        'locale', 'ssn', 'password',  'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin',
                        'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags',
                        'contacts', 'zipcode', 'address_number', 'city', 'province', 'country'])

        external_users_fact_table = FactTable(
            name='"FactExternalUsers"',
            keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['created_at', 'updated_at'])

        self.row['external_user_id'] = external_users_dimension.ensure(self.row)
        external_users_fact_table.insert(self.row)
Esempio n. 7
0
# As all the information needed for the other dimensions are stored in the
# sales database, we can loop through all the rows in it, split the timestamp
# into its three parts, and lookup the three dimension keys needed for
# the fact table while letting pygrametl update each dimension with the
# necessary data using Dimension.ensure(). Using this method instead of
# insert combines a lookup with a insertion so a new row only is inserted
# into the dimension or fact table, if it does not yet exist.
for row in sales_source:

    # The timestamp is split into its three parts
    split_timestamp(row)

    # We update the row with the primary keys of each dimension while at
    # the same time inserting new data into each dimension
    row['bookid'] = book_dimension.ensure(row)
    row['timeid'] = time_dimension.ensure(row)

    # We do not use ensure() for the location dimension, as city
    # and region information of all stores has already been loaded into
    # the table, and having a sale from a store we do not know about is
    # probably either an error in the sales or region data. We use lookup
    # instead which does not insert data and returns None, if no row with
    # the requested data is available, allowing for simple implementation
    # of error handling in ETL flow, which is shown here as an exception
    row['locationid'] = location_dimension.lookup(row)

    # A simple example of how to check if a lookup was successful, so
    # errors can be handled in some way. Here we just give up, and throw
    # an error.
    if not row['locationid']:
Esempio n. 8
0
Fact_WorkAttendance = AccumulatingSnapshotFactTable(
    name='Fact_WorkAttendance',
    keyrefs=['CandidateKey', 'GroupKey', 'DateKey'],
    otherrefs=['Version', 'TimeIn', 'TimeOut'],
    measures=['TotalHoursWorked'])

Fact_CandidateConversation = AccumulatingSnapshotFactTable(
    name='Fact_CandidateConversation',
    keyrefs=['CandidateKey', 'ConversationDateKey', 'ConversationId'],
    otherrefs=['Version', 'ConversationRole'],
    measures=['Rating'])

# Load the dimension tables
for row in DimDate_source:
    row['FullDate'] = convert_FullDate(row)
    DimDate.ensure(row, namemapping={'DateKey': 'DateKey'})

for row in DimGroup_source:
    row['Version'] = CS.VersionNumber
    DimGroup.scdensure(row)

for row in DimGroupCategory_source:
    DimGroupCategory.ensure(row)

for row in DimRole_source:
    DimRole.ensure(row)

for row in DimCandidate_source:
    row['Version'] = CS.VersionNumber
    DimCandidate.scdensure(row)
Esempio n. 9
0
author_dim = Dimension(name="authordim", key="aid", attributes=["name", "city", "cid"])

book_dim = SlowlyChangingDimension(
    name="bookdim", key="bid", attributes=["title", "year", "version"], lookupatts=["title"], versionatt="version"
)

country_dim = Dimension(name="countrydim", key="cid", attributes=["country"], lookupatts=["country"])

fact_table = FactTable(name="facttable", keyrefs=["aid", "bid"])

snowflake = SnowflakedDimension([(author_dim, country_dim)])

# We map cities to countries and populate the countrydim
cid_map = {}
for row in country_src:
    cid = country_dim.ensure(row)
    cid_map[row["city"]] = cid

# We populate the authordim and the fact table
for row in author_src:
    if row["city"] in ["Hadsten", "Skanderborg", "Kobenhavn"]:
        row["cid"] = cid_map[row["city"]]
    else:
        row["cid"] = None
    row["name"] = row["firstname"] + " " + row["lastname"]
    row.pop("aid", 0)  # Gets rid of aid so that pygrametl can generate them

    # Placing new row in author_dim
    row["aid"] = author_dim.ensure(row)

    # Placing new row in fact_table
Esempio n. 10
0
    row['month'] = timestamp_split[1]
    row['day'] = timestamp_split[2]

# We put the regions into our location_dimension
[location_dimension.insert(row) for row in region_source]
csv_file_handle.close()

# Each row in the sales database is iterated through and inserted
for row in sales_source:

    # Each row is passed to the timestamp split function for splitting
    split_timestamp(row)

    # Lookups are performed to find the key in each dimension for the fact
    # and if the data is not there, it is inserted from the sales row
    row['bookid'] = book_dimension.ensure(row)
    row['timeid'] = time_dimension.ensure(row)

    # For the location dimension, all the data is already present, so a
    # missing row must be an error
    row['locationid'] = location_dimension.lookup(row)
    if not row['locationid']:
       raise ValueError("city was not present in the location dimension")

    # The row can then be inserted into the fact table
    fact_table.insert(row)

# The data warehouse connection is then ordered to commit and close
dw_conn_wrapper.commit()
dw_conn_wrapper.close()