Example #1
0
    def load_cdr(self):
        call_dimension = Dimension(
            name='"DimCall"',
            key='uuid',
            attributes=[
                "SipFromUser", 'SipToUser', 'HangupCause', 'Direction',
                'SwitchName', 'CallerContext', 'IsAnswered', 'Duration',
                'ObjClass', 'ObjType', 'ObjCategory', 'CompanyId', 'TenantId',
                'AppId', 'AgentSkill', 'OriginatedLegs', 'DVPCallDirection',
                'HangupDisposition', 'AgentAnswered', 'IsQueued',
                'SipResource', 'BusinessUnit', 'CampaignId', 'CampaignName',
                'ExtraData'
            ])

        call_fact_table = FactTable(
            name='"FactCall"',
            keyrefs=[
                'uuid', 'CallUuid', 'BridgeUuid', 'CreatedDateDimId',
                'AnsweredDateDimId', 'BridgedDateDimId', 'HangupDateDimId',
                'CreatedTimeDimId', 'AnsweredTimeDimId', 'BridgedTimeDimId',
                'HangupTimeDimId'
            ],
            measures=[
                'CreatedTime', 'AnsweredTime', 'BridgedTime', 'HangupTime',
                'BillSec', 'HoldSec', 'ProgressSec', 'QueueSec', 'AnswerSec',
                'WaitSec', 'ProgressMediaSec', 'FlowBillSec'
            ])

        self.row['uuid'] = call_dimension.ensure(self.row)
        call_fact_table.insert(self.row)
Example #2
0
    def load_tickets(self):
        ticket_dimension = Dimension(
            name='"DimTicket"',
            key='ticket_id',
            attributes=[
                'active', 'tid', 'is_sub_ticket', 'subject', 'reference',
                'description', 'priority', 'requester', 'submitter',
                'collaborators', 'watchers', 'attachments', 'sub_tickets',
                'related_tickets', 'merged_tickets', 'isolated_tags', 'sla',
                'comments', 'tags', 'ticket_type', 'status_type', 'channel',
                'company', 'tenant', 'assignee', 'assignee_group',
                'security_level', 'event_author'
            ])

        ticket_fact_table = FactTable(
            name='"FactTicket"',
            keyrefs=[
                'engagement_session_id', 'ticket_id', 'created_at_dim_id',
                'updated_at_dim_id', 'due_at_dim_id', 'created_time_dim_id',
                'updated_time_dim_id', 'due_time_dim_id'
            ],
            measures=[
                'created_at', 'updated_at', 'due_at', 'time_estimation',
                're_opens', 'waited_time', 'worked_time', 'resolution_time'
            ])

        # Updates the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        self.row['ticket_id'] = ticket_dimension.ensure(self.row)
        ticket_fact_table.insert(self.row)
Example #3
0
    def load_engagement(self):
        engagement_dimension = Dimension(
            name='"DimEngagement"',
            key='engagement_id',
            attributes=['profile', 'channel_from', 'channel_to', 'company', 'has_profile', 'tenant',  'notes', 'channel'])

        engagement_fact_table = FactTable(
            name='"FactEngagement"',
            keyrefs=['engagement_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['updated_at', 'created_at'])

        self.row['eng_dim_id'] = engagement_dimension.ensure(self.row)
        engagement_fact_table.insert(self.row)
        return True
Example #4
0
    def load_ticket_events(self):
        ticket_event_dimension = Dimension(
            name='"DimTicketEvent"',
            key='ticket_event_id',
            attributes=['event_type', 'event_author', 'event_body'])

        ticket_event_fact_table = FactTable(
            name='"FactTicketEvent"',
            keyrefs=['ticket_id', 'ticket_event_id'],
            measures=['event_created_at_dim_id', 'event_created_at'])

        # Update the row with the primary keys of each dimension while at the same time inserting new data into
        # each dimension

        ticket_event_dimension.ensure(self.row)
        ticket_event_fact_table.insert(self.row)
Example #5
0
    def load_external_users(self):

        external_users_dimension = Dimension(
            name='"DimExternalUsers"',
            key='external_user_id',
            attributes=['title', 'name', 'avatar', 'birthday', 'gender', 'first_name', 'last_name',
                        'locale', 'ssn', 'password',  'phone', 'email', 'landnumber', 'facebook', 'twitter', 'linkedin',
                        'googleplus', 'skype', 'thirdpartyreference', 'company', 'tenant', 'custom_fields', 'tags',
                        'contacts', 'zipcode', 'address_number', 'city', 'province', 'country'])

        external_users_fact_table = FactTable(
            name='"FactExternalUsers"',
            keyrefs=['external_user_id', 'created_at_dim_id', 'updated_at_dim_id', 'created_time_dim_id', 'updated_time_dim_id'],
            measures=['created_at', 'updated_at'])

        self.row['external_user_id'] = external_users_dimension.ensure(self.row)
        external_users_fact_table.insert(self.row)
Example #6
0
    def cls_to_pygram_dim(cls, schema_name, lookup_fields=[]):
        # cls.cls_init_cols()
        # if not lookup_fields:

        lookup_fields = cls.cls_get_lookup_fields()
        if lookup_fields:
            dim = CachedDimension(name=schema_name + '.' + cls.cls_get_name(),
                                  key='id',
                                  attributes=cls.cls_get_column_names_no_id(),
                                  lookupatts=lookup_fields,
                                  cachefullrows=True)
        else:
            dim = Dimension(name=schema_name + '.' + cls.cls_get_name(),
                            key='id',
                            attributes=cls.cls_get_column_names_no_id())
        return dim
Example #7
0
    def load_originated_legs(self):

        self.row2["uuid"] = self.uuid
        self.row2["CallUuid"] = self.call_uuid
        self.row2["MemberUuid"] = self.member_uuid
        self.row2["IsTransferLeg"] = self.is_transfer_leg
        self.row2["Direction"] = self.direction
        self.row2["ObjType"] = self.op_cat
        self.row2["originated_leg"] = self.originatedLegsList

        originated_legs_dimension = Dimension(
            name='"DimOriginatedLegs"',
            key='uuid',
            attributes=['CallUuid', 'Direction', 'MemberUuid', 'originated_leg', 'IsTransferLeg', 'ObjType'])

        originated_legs_dimension_temp = Dimension(
            name='"DimOriginatedLegsTemp"',
            key='uuid',
            attributes=['CallUuid', 'Direction', 'MemberUuid', 'originated_leg', 'IsTransferLeg', 'ObjType'])

        self.row2['uuid'] = originated_legs_dimension.ensure(self.row2)
        self.row2['uuid'] = originated_legs_dimension_temp.ensure(self.row2)
Example #8
0
def convert_FullDate(row):
    # Convert FullDate to datetime
    return datetime.strptime(row['FullDate'], '%d/%m/%Y')


def key_finder(row, namemapping):
    # Assign the primary key of DimDate
    return row[namemapping['FullDate']]


# Create dimension and fact table abstractions for use in the ETL flow
DimDate = Dimension(
    name='DimDate',
    key='DateKey',
    attributes=['FullDate', 'DateName', 'DayOfWeek', 'DayNameOfWeek', 'DayOfMonth', 'DayOfYear', 'WeekdayWeekend', \
                'WeekOfYear', 'MonthName', 'MonthOfYear', 'IsLastDayOfMonth', 'CalendarQuarter', 'CalendarYear', \
                'CalendarYearMonth', 'CalendarYearQtr', 'FiscalMonthOfYear', 'FiscalQuarter', 'FiscalYear', \
                'FiscalYearMonth', 'FiscalYearQtr'],
    lookupatts=['FullDate'],
    idfinder=key_finder
)

DimGroup = TypeOneSlowlyChangingDimension(
    name='DimGroup',
    key='GroupKey',
    attributes=['GroupId', 'GroupName', 'Version'],
    lookupatts=['GroupId'])

DimGroupCategory = Dimension(
    name='DimGroupCategory',
    key='GroupCategoryKey',
    attributes=['GroupCategoryId', 'GroupCategoryName'],
Example #9
0
# Connections
dw_conn = sqlite3.connect(dw_path)
author_conn = sqlite3.connect(author_path)
book_conn = sqlite3.connect(book_path)
country_handle = open(country_path, "r")

wrapper = pygrametl.ConnectionWrapper(dw_conn)

# Sources
author_src = SQLSource(connection=author_conn, query="SELECT * FROM author")
book_src = SQLSource(connection=book_conn, query="SELECT * FROM book")
country_src = CSVSource(f=country_handle, delimiter=",")


# Tables
author_dim = Dimension(name="authordim", key="aid", attributes=["name", "city", "cid"])

book_dim = SlowlyChangingDimension(
    name="bookdim", key="bid", attributes=["title", "year", "version"], lookupatts=["title"], versionatt="version"
)

country_dim = Dimension(name="countrydim", key="cid", attributes=["country"], lookupatts=["country"])

fact_table = FactTable(name="facttable", keyrefs=["aid", "bid"])

snowflake = SnowflakedDimension([(author_dim, country_dim)])

# We map cities to countries and populate the countrydim
cid_map = {}
for row in country_src:
    cid = country_dim.ensure(row)
Example #10
0
""" A sample pygrametl program
"""

__author__ = 'Mathias Claus Jensen'

import pygrametl
from pygrametl.datasources import SQLSource
from pygrametl.tables import Dimension, FactTable
import sqlite3

input_conn = sqlite3.connect('input.db')
output_conn = sqlite3.connect('output.db')

input_src = SQLSource(input_conn, query='SELECT * dim1')
output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

dim1 = Dimension(name='dim1', key='key1', attributes=['attr1', 'attr2'])

dim1 = Dimension(name='dim2', key='key2', attributes=['attr3', 'attr4'])

ft1 = FactTable(name='ft1', keyrefs=[
    'key1',
])

input_conn.close()
output_conn.close()
Example #11
0
    SALARY         REAL);''')

company_info = [('Anders', 43, 'Denmark', 21000.00),
                ('Charles', 50, 'Texas', 25000.00),
                ('Wolf', 28, 'Sweden', 19000.00),
                ('Hannibal', 45, 'America', 65000.00),
                ('Hannibal', 45, 'America', 65000.00),
                ('Buggy', 67, 'America', 2000), ('Buggy', 67, 'America', 2000),
                ('Buggy', 67, 'America', 2000), ('Buggy', 67, 'America', 2000),
                ('Buggy', 67, 'America', 2000), ('Buggy', 67, 'America', 2000)]

# ... and inserting the necessary data.
c.executemany("INSERT INTO COMPANY (NAME,AGE,ADDRESS,SALARY) VALUES (?,?,?,?)",
              company_info)
conn.commit()
columns = ['NAME', 'AGE', 'ADDRESS', 'SALARY']

wrapper = ConnectionWrapper(conn)
dim = Dimension(name='COMPANY',
                key='ID',
                attributes=['NAME', 'AGE', 'ADDRESS', 'SALARY'],
                lookupatts=['NAME'])

dim_rep = DimRepresentation(dim, conn)
dw = DWRepresentation([dim_rep], conn)

dup_predicate = DuplicatePredicate('company', ['ID'], True)
print(dup_predicate.run(dw))

conn.close()
Example #12
0
}, {
    'attr1': 25
}, {
    'attr1': None
}, {
    'attr1': 5
}, {
    'attr1': None
}, {
    'attr1': None
}]

wrapper = pygrametl.ConnectionWrapper(connection=null_conn)

dim1 = Dimension(
    name='dim1',
    key='key1',
    attributes=['attr1'],
)

for row in data:
    dim1.insert(row)

dim_rep = DimRepresentation(dim1, null_conn)
notnull_tester = ColumnNotNullPredicate('dim1')
null_rep = DWRepresentation([dim_rep], null_conn)

print(notnull_tester.run(null_rep))

null_conn.close()
Example #13
0
def setup(dbname, number):

    open(os.path.expanduser(dbname), 'w')

    output_conn = sqlite3.connect(dbname)
    output_cur = output_conn.cursor()

    output_cur.execute("CREATE TABLE dim1 "
                       "(key1 INTEGER PRIMARY KEY, attr1 INTEGER, "
                       "attr2 INTEGER)")

    output_cur.execute("CREATE TABLE dim2 "
                       "(key2 INTEGER PRIMARY KEY, attr3 INTEGER, "
                       "attr4 INTEGER)")

    output_cur.execute("CREATE TABLE ft1 "
                       "(measure INTEGER, key1 INTEGER, key2 INTEGER, "
                       "PRIMARY KEY(key1, key2))")

    output_conn.commit()
    output_wrapper = pygrametl.ConnectionWrapper(connection=output_conn)

    dim1 = Dimension(
        name='dim1',
        key='key1',
        attributes=['attr1', 'attr2']
    )

    dim2 = Dimension(
        name='dim2',
        key='key2',
        attributes=['attr3', 'attr4'],
    )

    ft1 = FactTable(
        name='ft1',
        keyrefs=['key1', 'key2'],
        measures=['measure']
    )

    facts = []
    dim1_data = []
    dim2_data = []

    total_elapsed = 0.
    print('Generating ft1 data')
    start = time.monotonic()
    counter = 0
    for i in range(1, number + 1):
        for j in range(1, number + 1):
            counter += 10
            facts.append({'key1': i, 'key2': j, 'measure': counter})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Generating dim1 data')
    start = time.monotonic()
    for i in range(1, number + 1):
        dim1_data.append({'attr1': i, 'attr2': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Generating dim2 data')
    start = time.monotonic()
    for i in range(1, number + 1):
        dim2_data.append({'attr3': i, 'attr4': number + 1 - i})
    end = time.monotonic()
    elapsed = end - start
    print('Generated: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into ft1')
    start = time.monotonic()
    for row in facts:
        ft1.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into dim1')
    start = time.monotonic()
    for row in dim1_data:
        dim1.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('Inserting data into dim2')
    start = time.monotonic()
    for row in dim2_data:
        dim2.insert(row)
    end = time.monotonic()
    elapsed = end - start
    print('Inserted: {}{}'.format(round(elapsed, 3), 's'))
    total_elapsed += elapsed

    print('DW populated')
    print('Total time: {}{}\n'.format(round(total_elapsed, 3), 's'))
    output_conn.commit()
Example #14
0
region_source = CSVSource(f=csv_file_handle, delimiter=',')

# We create and object for each dimension in the DW and the FactTable
book_dimension = Dimension(
    name='bookDim',
    key='bookid',
    attributes=['book', 'genre'])

time_dimension = Dimension(
    name='timeDim',
    key='timeid',
    attributes=['day', 'month', 'year'])

location_dimension = Dimension(
    name='locationDim',
    key='locationid',
    attributes=['city', 'region'],
    lookupatts=['city'])

fact_table = FactTable(
    name='factTable',
    keyrefs=['bookid', 'locationid', 'timeid'],
    measures=['sale'])


# NOTE: Most of the following code is taken directly form pygrametl.org and has little to do with
# making this example portable to SQLite and this particular implementation. It is however down here
# that a lot of the cool stuff happens, i.e. the ETL stuff.
# Python function needed to split the timestamp into its three parts

# We put the regions into our location_dimension
Example #15
0
sales_conn = sqlite3.connect(SALES_DB_NAME)
dw_conn = sqlite3.connect(DW_NAME)

# Wrapper for pygrametl, so that it now is in charge of our DW stuff
dw_conn_wrapper = pygrametl.ConnectionWrapper(connection=dw_conn)

# We make a source for our SQL and CSV
# NOTE: We use NAME_MAPPING to do some mapping for us, not sure why.
sales_source = SQLSource(connection=sales_conn, query="SELECT * FROM sales", names=NAME_MAPPING) 

csv_file_handle = open(CSV_NAME, "r")
region_source = CSVSource(f=csv_file_handle, delimiter=',')

# We create and object for each dimension in the DW and the FactTable
book_dimension = Dimension(
    name='bookDim',
    key='bookid',
    attributes=['book', 'genre'])

time_dimension = Dimension(
    name='timeDim',
    key='timeid',
    attributes=['day', 'month', 'year'])

location_dimension = Dimension(
    name='locationDim',
    key='locationid',
    attributes=['city', 'region'],
    lookupatts=['city'])

fact_table = FactTable(
    name='factTable',
Example #16
0
    {'attr1': None},
    {'attr1': 15},
    {'attr1': None},
    {'attr1': None},
    {'attr1': 1},
    {'attr1': 25},
    {'attr1': None},
    {'attr1': 5},
    {'attr1': None},
    {'attr1': None}
]

wrapper = pygrametl.ConnectionWrapper(connection=null_conn)

dim1 = Dimension(
    name='dim1',
    key='key1',
    attributes=['attr1'],
)

for row in data:
    dim1.insert(row)

dim_rep = DimRepresentation(dim1, null_conn)
notnull_tester = ColumnNotNullPredicate('dim1')
null_rep = DWRepresentation([dim_rep], null_conn)

print(notnull_tester.run(null_rep))

null_conn.close()
Example #17
0
# handle to the file, as pygrametl uses Pythons DictReader for CSV files,
# and the header of the CSV file contains information about each column.
region_file_handle = open('c:\\work\\python\\region.csv', 'r', 16384)
region_source = CSVSource(f=region_file_handle, delimiter=',')

# An instance of Dimension is created for each dimension in the data
# warehouse. For each table, the name of the table, the primary key of
# the table, and a list of non key attributes in the table, are added.
# In addition, for the location dimension we specify which attributes
# should be used for a lookup of the primary key, as only the city is
# present in the sales database and is enough to perform a lookup of
# a unique primary key. As mentioned in the beginning of the guide, using
# named parameters is strongly encouraged.

book_dimension = Dimension(
    name='book',
    key='bookid',
    attributes=['book', 'genre'])

time_dimension = Dimension(
    name='time',
    key='timeid',
    attributes=['day', 'month', 'year'])

location_dimension = Dimension(
    name='location',
    key='locationid',
    attributes=['city', 'region'],
    lookupatts=['city'])

# A single instance of FactTable is created for the data warehouse's
# fact table, with the name of the table, a list of attributes constituting
Example #18
0
# Making table to test on...
c.execute('''CREATE TABLE FACTTABLE
    (Book           TEXT   NOT NULL,
    Issue            INT    )''')

company_info = [('The Hobbit', 1), ('The Bobbit', 5)]

# ... and inserting the necessary data.
c.executemany("INSERT INTO FACTTABLE (BOOK,ISSUE) VALUES (?,?)", company_info)

conn.commit()

ConnectionWrapper(conn)

dim = Dimension('COMPANY', 'ID', ['NAME', 'AGE', 'ADDRESS', 'SALARY'], ['NAME'])
dim_rep = DimRepresentation(dim, conn)

dim2 = Dimension('LASTNAME', 'NAME', ['LAST'])
dim_rep2 = DimRepresentation(dim2, conn)

ft = FactTable('FACTTABLE', ['Book'], ['Issue'])
ft_rep = FTRepresentation(ft, conn)

dw = DWRepresentation([dim_rep, ft_rep, dim_rep2], conn)

expected_list1 = [
    {'NAME': 'Anders', 'AGE': 43, 'SALARY': 21000.0, 'ADDRESS': 'Denmark',
     'ID': 1},
    {'NAME': 'Charles', 'AGE': 50, 'SALARY': 25000.0, 'ADDRESS': 'Texas',
     'ID': 2},
Example #19
0
}, {
    'attr1': 75,
    'attr2': 75,
    'attr3': 75,
    'attr4': 75
}, {
    'attr1': 76,
    'attr2': 76,
    'attr3': 76,
    'attr4': 76
}]

wrapper = pygrametl.ConnectionWrapper(connection=conn)

dim1 = Dimension(name='dim1',
                 key='key1',
                 attributes=['attr1', 'key2', 'key3'],
                 lookupatts=['attr1'])

dim2 = Dimension(name='dim2',
                 key='key2',
                 attributes=['attr2', 'key4'],
                 lookupatts=['attr2'])

dim3 = Dimension(name='dim3', key='key3', attributes=['attr3'])

dim4 = Dimension(name='dim4', key='key4', attributes=['attr4'])

special_snowflake = SnowflakedDimension(references=[(dim1,
                                                     [dim2, dim3]), (dim2,
                                                                     dim4)])
Example #20
0
    def __create_tables(self):

        # Systems
        self.system_dimension = CachedDimension(name='system',
                                                key='system_id',
                                                attributes=['system_name'],
                                                lookupatts=['system_name'])

        # Stations

        self.start_station_dimension = CachedDimension(
            name='start_station',
            key='start_station_id',
            attributes=[
                'system_id', 'start_station_short_name', 'start_station_name',
                'start_station_latitude', 'start_station_longitude',
                'start_station_capacity'
            ],
            lookupatts=['system_id', 'start_station_short_name'],
            rowexpander=start_station_missing_data_expander)

        self.end_station_dimension = CachedDimension(
            name='end_station',
            key='end_station_id',
            attributes=[
                'system_id', 'end_station_short_name', 'end_station_name',
                'end_station_latitude', 'end_station_longitude',
                'end_station_capacity'
            ],
            lookupatts=['system_id', 'end_station_short_name'],
            rowexpander=end_station_missing_data_expander)

        # Trip dates and times

        self.date_dimension = CachedDimension(
            name='bdate',
            key='date_id',
            attributes=['year', 'month', 'day', 'day_of_week', 'date_string'],
            lookupatts=['date_string'],
            rowexpander=date_row_expander)

        self.time_dimension = CachedDimension(
            name='btime',
            key='time_id',
            attributes=['hour', 'minute', 'time_string', 'time_of_day'],
            lookupatts=['time_string'],
            rowexpander=time_row_expander)

        # Trips

        self.trip_fact_table = FactTable(
            name='trips',
            measures=['duration_s'],
            keyrefs=[
                'system_id', 'start_station_id', 'end_station_id', 'date_id',
                'time_id', 'customer_birthyear_id', 'customer_gender_id',
                'customer_type_id', 'bike_id', 'trip_category_id'
            ])

        # weather fact table and date dimension

        self.weather_fact_table = FactTable(name='weather',
                                            measures=[
                                                'precipitation_in', 'snow_in',
                                                'temp_avg_f', 'temp_min_f',
                                                'temp_max_f', 'wind_mph'
                                            ],
                                            keyrefs=['system_id', 'date_id'])

        self.trip_category = CachedDimension(name='trip_category',
                                             key='trip_category_id',
                                             attributes=['trip_category'])

        self.bike_dimension = CachedDimension(
            name='bikes',
            key='bike_id',
            attributes=['system_id', 'bike_name'],
            lookupatts=['system_id', 'bike_name'],
            defaultidvalue=-1)

        self.customer_gender_dimension = CachedDimension(
            name='customer_gender',
            key='customer_gender_id',
            attributes=['customer_gender'],
            lookupatts=['customer_gender'])

        self.customer_birthyear_dimension = CachedDimension(
            name='customer_birthyear',
            key='customer_birthyear_id',
            attributes=['customer_birthyear'],
            lookupatts=['customer_birthyear'])

        self.customer_type_dimension = CachedDimension(
            name='customer_type',
            key='customer_type_id',
            attributes=['customer_type'],
            lookupatts=['customer_type'])

        # Station status
        self.station_status_fact_table = FactTable(
            name='station_status',
            keyrefs=['system_id', 'start_station_id', 'date_id', 'time_id'],
            measures=['bikes_available', 'docks_available'])

        # Non-cached version of stations for use only with updating Indego stations.

        self.start_station_noncached_dimension = Dimension(
            name='start_station',
            key='start_station_id',
            attributes=[
                'system_id', 'start_station_short_name', 'start_station_name',
                'start_station_latitude', 'start_station_longitude',
                'start_station_capacity'
            ],
            lookupatts=['system_id', 'start_station_short_name'],
            rowexpander=start_station_missing_data_expander,
            defaultidvalue=-1)

        self.end_station_noncached_dimension = Dimension(
            name='end_station',
            key='end_station_id',
            attributes=[
                'system_id', 'end_station_short_name', 'end_station_name',
                'end_station_latitude', 'end_station_longitude',
                'end_station_capacity'
            ],
            lookupatts=['system_id', 'end_station_short_name'],
            rowexpander=end_station_missing_data_expander,
            defaultidvalue=-1)