Example #1
0
    def test_job_info_factory_create_from_db_existing(self):
        '''test when there are existing entries in the dw_prcsng_cycle table'''
        with dwetl.test_database_session() as session:
            table_base_class = dwetl.Base.classes.dw_prcsng_cycle
            prcsng_cycle_id = self.get_max_prcsng_cycle_id(
                session, table_base_class)

            job_info = JobInfoFactory.create_job_info_from_db(
                session, table_base_class)
            self.assertEqual(prcsng_cycle_id + 1, job_info.prcsng_cycle_id)

            expected_version = dwetl.version
            self.assertEqual(expected_version, job_info.job_version_no)

            self.assertEqual(1, job_info.job_exectn_id)

            job_info = JobInfoFactory.create_job_info_from_db(
                session, table_base_class)
            self.assertEqual(prcsng_cycle_id + 2, job_info.prcsng_cycle_id)

            expected_version = dwetl.version
            self.assertEqual(expected_version, job_info.job_version_no)
Example #2
0
def run(input_directory):

    #create logger
    today = datetime.datetime.now().strftime('%Y%m%d')
    logger = logging.getLogger('dwetl')
    file_handler = logging.FileHandler(f'logs/dwetl.log.{today}')
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.setLevel(logging.INFO)

    time_started = datetime.datetime.now()
    logger.info(f'DWETL.py started')
    '''
    create job_info for current process
    '''
    with dwetl.database_session() as session:
        job_info_table_class = dwetl.Base.classes['dw_prcsng_cycle']
        job_info = JobInfoFactory.create_job_info_from_db(
            session, job_info_table_class)
    '''
    load_stage_1
    '''
    load_stage_1.load_stage_1(job_info, input_directory, logger)
    '''
    load_stage_2
    load 'in_' values from stg1 to stg 2 tables
    load 'in_' values
    '''
    load_stage_2.load_stage_2(job_info, logger)
    '''
    stg 2 intertable processing
    '''
    stage_2_intertable_processing.stage_2_intertable_processing(
        job_info, logger)
    '''
    end of job metadata writing
    '''

    endtime = datetime.datetime.now()
    # write end time to processing cycle table
    with dwetl.database_session() as session:
        job_info_table_class = dwetl.Base.classes['dw_prcsng_cycle']
        # get row for current id and write end time to it
        max_prcsng_id = session.query(job_info_table_class).\
            filter(job_info_table_class.dw_prcsng_cycle_id == job_info.prcsng_cycle_id).\
            update({'dw_prcsng_cycle_exectn_end_tmstmp': endtime})

    elapsed_time = endtime - time_started
    print("elapsed time: ", str(elapsed_time))
Example #3
0
    def test_job_info_factory_create_from_db(self):
        # expected user
        expected_user = getpass.getuser()

        with dwetl.test_database_session() as session:
            table_base_class = dwetl.Base.classes.dw_prcsng_cycle
            prcsng_cycle_id = self.get_max_prcsng_cycle_id(
                session, table_base_class)
            next_pcid = prcsng_cycle_id + 1
            job_info = JobInfoFactory.create_job_info_from_db(
                session, table_base_class)
            self.assertEqual(next_pcid, job_info.prcsng_cycle_id)
            self.assertEqual(1, job_info.job_exectn_id)
            self.assertEqual(dwetl.version, job_info.job_version_no)
            self.assertEqual(expected_user, job_info.user_id)

            added_record = session.query(table_base_class).filter(
                table_base_class.dw_prcsng_cycle_id ==
                next_pcid).one().__dict__
            self.assertEqual(next_pcid, added_record['dw_prcsng_cycle_id'])
            self.assertEqual(expected_user, added_record['em_create_user_id'])
Example #4
0
    #         #writer = PrintWriter()
    #         writer = SqlAlchemyWriter(session, dwetl.Base.classes[table])
    #         logger = None
    #         processor = LoadMpfTsv(reader, writer, job_info, logger)
    #
    #         processor.execute()

'''
main function for running script from the command line
'''
if __name__=='__main__':
    arguments = sys.argv

    if len(arguments) < 2 or len(arguments) > 3:
        print('Usage: ')
        print('\tload_stage_1.py [prcsng_cycle_id] [data_directory] ')
        sys.exit(1)

    prcsng_cycle_id = arguments[1]
    input_directory = os.path.dirname(os.path.realpath(__file__))
    today = datetime.datetime.now().strftime('%Y%m%d')
    # if 2nd argument isn't provided use today as data directory
    data_directory = os.path.join(input_directory,'data', today)

    # data directory can be specified as 2nd argument
    if len(arguments) == 3:
        data_directory = arguments[2]

    job_info = JobInfoFactory.create_from_prcsng_cycle_id(prcsng_cycle_id)
    load_stage_1(job_info, data_directory)
Example #5
0
 def test_job_info_factory_create_from_prcsng_cycle_id(self):
     job_info = JobInfoFactory.create_from_prcsng_cycle_id(1)
     self.assertEqual(1, job_info.prcsng_cycle_id)
 def setUp(self):
     self.writer = ListWriter()
     self.job_info = JobInfoFactory.create_from_prcsng_cycle_id(-1)
     self.logger = None
Example #7
0
def run(input_file):
    #create logger
    today = datetime.datetime.now().strftime('%Y%m%d')
    logger = logging.getLogger('dwetl')
    file_handler = logging.FileHandler(f'logs/ezproxy.log.{today}')
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.setLevel(logging.INFO)

    time_started = datetime.datetime.now()
    logger.info(f'EzProxy ETL started')
    '''
    check current hostname environment configuration to prevent errors
    '''
    hostname = socket.gethostname()
    configured_host = database_credentials.configured_host()

    if hostname != configured_host:
        # quit program if env file hostname doesn't match with the current hostname
        print(
            'ERROR: EzProxy ETL ended because .env contained an error. Please double check the configured host and db configuration.'
        )
        logger.error(
            f'EzProxy ETL ended because .env contained an error. please double check the configured host and db configuration.'
        )
        sys.exit()
    '''
    create job_info for current process
    '''
    with dwetl.database_session() as session:
        job_info_table_class = dwetl.Base.classes['dw_prcsng_cycle']
        job_info = JobInfoFactory.create_job_info_from_db(
            session, job_info_table_class)
    '''
    load ezproxy stage 1 
    '''
    ezproxy_load.load_stage_1(job_info, input_file, logger)
    '''
    load ezproxy stage 2 
    '''
    ezproxy_load.load_stage_2(job_info, logger)
    '''
    stg 2 intertable processing
    '''
    ezproxy_load.intertable_processing(job_info, logger)
    '''
    fact table load
    '''
    ezproxy_load.load_fact_table(job_info, logger)
    '''
    copy new ezproxy data to reporting database 
    '''
    ezproxy_load.copy_new_facts_to_reporting_db(job_info, logger)
    '''
    end of job metadata writing
    '''

    endtime = datetime.datetime.now()
    # write end time to processing cycle table
    with dwetl.database_session() as session:
        job_info_table_class = dwetl.Base.classes['dw_prcsng_cycle']
        # get row for current id and write end time to it
        max_prcsng_id = session.query(job_info_table_class).\
            filter(job_info_table_class.dw_prcsng_cycle_id == job_info.prcsng_cycle_id).\
            update({'dw_prcsng_cycle_exectn_end_tmstmp': endtime})

    elapsed_time = endtime - time_started
    print("Ezproxy ETL elapsed time: ", str(elapsed_time))
    logger.info(f'EzProxy ETL elapsed time: {str(elapsed_time)}')