def load_fact_table(job_info, logger): print('EZProxy loading fact table...') logger.info('Loading to the fact table.... ') stage2_table = dwetl.Base.classes['dw_stg_2_ezp_sessns_snap'] fact_table = dwetl.Base.classes['fact_ezp_sessns_snap'] processing_cycle_id = job_info.prcsng_cycle_id # get max value for fact key from the reporting db with dwetl.reporting_database_session() as session2: reporting_fact_table = dwetl.ReportingBase.classes[ 'fact_ezp_sessns_snap'] max_ezp_sessns_snap_fact_key = session2.query( func.max(reporting_fact_table.ezp_sessns_snap_fact_key)).scalar() if max_ezp_sessns_snap_fact_key is None: max_ezp_sessns_snap_fact_key = 1 # load etl ezp fact table with dwetl.database_session() as session: reader = SqlAlchemyReader(session, stage2_table, 'em_create_dw_prcsng_cycle_id', processing_cycle_id) writer = SqlAlchemyWriter(session, fact_table) processor = EzproxyFactProcessor(reader, writer, job_info, logger, max_ezp_sessns_snap_fact_key) processor.execute() logger.info('Finished loading to the fact table.... ')
def copy_new_facts_to_reporting_db(job_info, logger): etl_fact_table = dwetl.Base.classes['fact_ezp_sessns_snap'] processing_cycle_id = job_info.prcsng_cycle_id # query and select records from etl fact table # should we use the create update processing cycle ID? or the Update processing cycle id? with dwetl.database_session() as session: reader = SqlAlchemyReader(session, etl_fact_table, 'em_create_dw_prcsng_cycle_id', processing_cycle_id) session.expunge_all() # insert records into reporting db ezp fact table with dwetl.reporting_database_session() as session2: reporting_fact_table = dwetl.ReportingBase.classes[ 'fact_ezp_sessns_snap'] writer = SqlAlchemyWriter(session2, reporting_fact_table) processor = EzproxyReportingFactProcessor(reader, writer, job_info, logger) processor.execute()
def clndr_dt_dim_lookup(item): """ using the ezp_sessns_snap_tmstmp, get the calendar date, and look up clndr_dt_dim_key in dim_date """ timestamp = item['in_ezp_sessns_snap_tmstmp'] date = datetime.strptime(timestamp, '%Y%m%d-%H%M') # need a string YYYY-MM-DD datestring = date.strftime('%Y-%m-%d') with dwetl.reporting_database_session() as session2: dim_date = dwetl.ReportingBase.classes.dim_date # look up the calendar date matching_row = session2.query(dim_date).\ filter(dim_date.clndr_dt==datestring).\ filter(date >= func.date(dim_date.rm_rec_effective_from_dt)).\ filter(date < func.date(dim_date.rm_rec_effective_to_dt)).first() clndr_dt_dim_key = matching_row.clndr_dt_dim_key return clndr_dt_dim_key
def library_dim_lookup(item): """ using the mbr_lbry_cd, find the mbr_lbry_dim_key and put into t1_mbr_lbry_cd__ezp_sessns_snap_mbr_lbry_dim_key """ library_code = item['in_mbr_lbry_cd'].upper() timestamp = item['in_ezp_sessns_snap_tmstmp'] date = datetime.strptime(timestamp, '%Y%m%d-%H%M') with dwetl.reporting_database_session() as session2: MemberLibrary = dwetl.ReportingBase.classes.dim_mbr_lbry # look up the mbr_lbry_dim_key matching_row = session2.query(MemberLibrary).\ filter(MemberLibrary.mbr_lbry_cd==library_code).\ filter(date >= func.date(MemberLibrary.rm_rec_effective_from_dt)).\ filter(date < func.date(MemberLibrary.rm_rec_effective_to_dt)).first() mbr_lbry_dim_key = matching_row.mbr_lbry_dim_key return mbr_lbry_dim_key
def time_of_day_dim_key_lookup(item): """ using ezp_sessns_snap_tmstmp get ezp_sessns_snap_time_of_day_dim_key """ timestamp = item['in_ezp_sessns_snap_tmstmp'] date = datetime.strptime(timestamp, '%Y%m%d-%H%M') # need a string HH:MM:SS timestring = date.strftime('%H:%M:%S') with dwetl.reporting_database_session() as session2: dim_time_of_day = dwetl.ReportingBase.classes.dim_time_of_day #look up the time of day matching_row = session2.query(dim_time_of_day).\ filter(dim_time_of_day.time_of_day==timestring).\ filter(date >= func.date(dim_time_of_day.rm_rec_effective_from_dt)).\ filter(date < func.date(dim_time_of_day.rm_rec_effective_to_dt)).first() time_dim_key = matching_row.time_of_day_dim_key return time_dim_key