예제 #1
0
    def test_load_aleph_tsv(self):
        sample_data = [{
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000007520'
        }, {
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000147967'
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        step = LoadAlephTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))
        self.assertEqual('000007520', results[0]['rec_trigger_key'])
        self.assertEqual('000147967', results[1]['rec_trigger_key'])

        expected_keys = sorted([
            'rec_type_cd', 'db_operation_cd', 'rec_trigger_key',
            'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id',
            'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp'
        ])
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
예제 #2
0
    def test_load_mpf_tsv(self):

        sample_data = [
            {'collection_cd': 'MSTCK',
            'lbry_entity_cd': 'BC-BC',
            'collection_name': 'Media Stacks / Request to Pick-up at Home Library',
            'db_operation_cd': 'U',
            'lbry_staff_lms_user_id': 'hhanson',
            'db_operation_effective_date': '2019-08-19'}
        ]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user' , '1', '1')

        logger = None

        step = LoadMpfTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))

        expected_keys = sorted([
            'collection_cd', 'collection_name', 'lbry_entity_cd', 'db_operation_cd', 'usmai_mbr_lbry_cd',
            'lbry_staff_lms_user_id', 'db_operation_effective_date',
            'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id',
            'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp'
        ])
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual("BC", results[0]['usmai_mbr_lbry_cd'])
예제 #3
0
    def test_load_z00_field_tsv(self):

        sample_data = [{
            'z00_doc_number': '000025252',
            'z00_marc_rec_field_cd': 'FMT',
            'UNUSED': 'L',
            'z00_marc_rec_field_txt': 'BK',
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        step = LoadZ00FieldTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))

        expected_keys = sorted([
            'rec_type_cd', 'db_operation_cd', 'rec_trigger_key',
            'z00_doc_number', 'dw_stg_1_marc_rec_field_seq_no',
            'z00_marc_rec_field_cd', 'z00_marc_rec_field_txt',
            'em_create_dw_prcsng_cycle_id', 'em_create_dw_job_exectn_id',
            'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp'
        ])
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual(1, results[0]['dw_stg_1_marc_rec_field_seq_no'])
예제 #4
0
    def test_copy_mai60_z00_field_stage1_to_stage2(self):
        sample_data = [
            {
                # Sample from dw_stg_1_mai60_z00_field
                '_sa_instance_state': '',
                'db_operation_cd': 'I',
                'em_create_dw_job_exectn_id': 1,
                'em_create_dw_job_name': 'CreateFileEquivalentTable',
                'em_create_dw_job_version_no': '0.0',
                'em_create_dw_prcsng_cycle_id': self.job_info.prcsng_cycle_id,
                'em_create_tmstmp': 'datetime.datetime(2019, 9, 17, 8, 53, 56, 619908)',
                'em_create_user_id': 'test_user',
                'rec_trigger_key': '000153121',
                'rec_type_cd': 'D',
                'z00_doc_number': '000153121',
                'dw_stg_1_marc_rec_field_seq_no': '1',
                'z00_marc_rec_field_cd':'FMT',
                'z00_marc_rec_field_txt': 'HO'
            }
        ]

        aleph_library = 'mai60'
        reader = ListReader(sample_data)
        processor = CopyStage1ToStage2.create(reader, self.writer, self.job_info, self.logger, aleph_library)
        processor.execute()
        results = self.writer.list

        self.assertEqual(len(sample_data), len(results))
        expected_keys = [
            'db_operation_cd',
            'dw_stg_2_aleph_lbry_name',
            'in_z00_doc_number',
            'in_dw_stg_1_marc_rec_field_seq_no',
            'in_z00_marc_rec_field_cd',
            'in_z00_marc_rec_field_txt',
            'em_create_dw_prcsng_cycle_id',
            'em_create_dw_job_exectn_id',
            'em_create_dw_job_name',
            'em_create_dw_job_version_no',
            'em_create_user_id',
            'em_create_tmstmp'
        ]
        self.assertEqual(sorted(expected_keys), sorted(list(results[0].keys())))

        self.assertEqual('I', results[0]['db_operation_cd'])
        self.assertEqual('mai60', results[0]['dw_stg_2_aleph_lbry_name'])
        self.assertEqual('000153121', results[0]['in_z00_doc_number'])
        self.assertEqual('1', results[0]['in_dw_stg_1_marc_rec_field_seq_no'])
        self.assertEqual('FMT', results[0]['in_z00_marc_rec_field_cd'])
        self.assertEqual('HO', results[0]['in_z00_marc_rec_field_txt'])
        self.assertEqual(processor.job_name(), results[0]['em_create_dw_job_name'])
예제 #5
0
    def test_process_item(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')
        reader = ListReader(self.sample_data)
        ezproxy_fact_processor = EzproxyFactProcessor(reader, writer, job_info, self.logger, self.max_ezp_sessns_snap_fact_key )
        ezproxy_fact_processor.execute()
        results = ezproxy_fact_processor.writer.list
        expected_keys = sorted([
            'em_create_dw_prcsng_cycle_id', 'in_ezp_sessns_snap_tmstmp', 'in_mbr_lbry_cd',
            'em_create_dw_job_exectn_id', 'em_create_dw_job_name', 'em_create_dw_job_version_no',
            'em_create_user_id', 'em_create_tmstmp',  'ezp_sessns_snap_fact_key'
            ])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
예제 #6
0
    def test_mai50_z35_event_type_not_in_list(self):
        sample_data = [
                { 'z35_event_type': '01',
                'z35_rec_key': '003891145'},
                { 'z35_event_type': '82',
                'z35_rec_key': '004893642'},
            ]
        aleph_library= 'mai50'

        reader = ListReader(sample_data)
        processor = CopyStage1ToStage2.create(reader, self.writer, self.job_info, self.logger, aleph_library)
        processor.execute()
        results = self.writer.list

        self.assertEqual(1, len(results))
        self.assertEqual('004893642', results[0]['in_z35_rec_key'])
예제 #7
0
    def test_identity_processor(self):
        sample_data = [{
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000007520'
        }, {
            'rec_type_cd': 'D',
            'db_operation_cd': 'U',
            'rec_trigger_key': '000147967'
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        # Using negative processing_cycle_id so having real data in the
        # tables won't interfere with the tests.
        processing_cycle_id = -1

        job_info = {
            'em_create_dw_prcsng_cycle_id': processing_cycle_id,
            'em_create_dw_job_exectn_id': 1,
            'em_create_dw_job_name': 'TEST',
            'em_create_dw_job_version_no': '0.0',
            'em_create_user_id': 'test_user',
            'em_create_tmstmp': datetime.datetime.now()
        }

        logger = None

        step = IdentityProcessor(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))
        self.assertEqual('000007520', results[0]['rec_trigger_key'])
        self.assertEqual('000147967', results[1]['rec_trigger_key'])

        # job_info keys are not expected, because IdentityProcessing only
        # passes data unchanged from reader to writer
        expected_keys = ['rec_type_cd', 'db_operation_cd', 'rec_trigger_key']
        self.assertEqual(expected_keys, list(results[0].keys()))
예제 #8
0
    def test_marc_rec_field_seq_no(self):
        """
        tests to see if sequence number increments when the same
        z00_doc_number comes in. resets sequence number to 1 if new z00_doc_number
        """
        sample_data = [
            {
                'z00_doc_number': '000025252',
                'z00_marc_rec_field_cd': 'FMT',
                'UNUSED': 'L',
                'z00_marc_rec_field_txt': 'BK',
            },
            {
                'z00_doc_number': '000025252',
                'z00_marc_rec_field_cd': 'LDR',
                'UNUSED': 'L',
                'z00_marc_rec_field_txt': '^^^^^cam^^2200493^^^4500',
            },
            {
                'z00_doc_number': '000090849',
                'z00_marc_rec_field_cd': 'FMT',
                'UNUSED': 'L',
                'z00_marc_rec_field_txt': 'BK',
            },
        ]
        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        step = LoadZ00FieldTsv(reader, writer, job_info, logger)

        step.execute()

        results = writer.list

        self.assertEqual(len(sample_data), len(results))

        self.assertEqual(1, results[0]['dw_stg_1_marc_rec_field_seq_no'])
        self.assertEqual(2, results[1]['dw_stg_1_marc_rec_field_seq_no'])
        self.assertEqual(1, results[2]['dw_stg_1_marc_rec_field_seq_no'])
    def test_end_to_end(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')
        reader = ListReader(self.sample_data)
        ezproxy_reporting_processor = EzproxyReportingFactProcessor(
            reader, writer, job_info, self.logger)
        ezproxy_reporting_processor.execute()

        results = ezproxy_reporting_processor.writer.list

        expected_keys = sorted([
            'em_create_dw_job_exectn_id', 'em_create_dw_job_name',
            'em_create_dw_job_version_no', 'em_create_dw_prcsng_cycle_id',
            'em_create_tmstmp', 'em_create_user_id',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_dw_prcsng_cycle_id',
            'em_update_reason_txt', 'em_update_tmstmp', 'em_update_user_id',
            'ezp_sessns_snap_actv_sessns_cnt',
            'ezp_sessns_snap_clndr_dt_dim_key', 'ezp_sessns_snap_fact_key',
            'ezp_sessns_snap_mbr_lbry_dim_key',
            'ezp_sessns_snap_time_of_day_dim_key', 'ezp_sessns_snap_tmstmp',
            'rm_current_rec_flag', 'rm_rec_effective_from_dt',
            'rm_rec_effective_to_dt', 'rm_rec_type_cd', 'rm_rec_type_desc',
            'rm_rec_version_no'
        ])

        self.assertEqual(None, results[0]['em_update_dw_job_exectn_id'])
        self.assertEqual(None, results[0]['em_update_dw_job_name'])
        self.assertEqual(None, results[0]['em_update_dw_job_version_no'])
        self.assertEqual(None, results[0]['em_update_dw_prcsng_cycle_id'])
        self.assertEqual(None, results[0]['em_update_reason_txt'])
        self.assertEqual(None, results[0]['em_update_tmstmp'])
        self.assertEqual(None, results[0]['em_update_user_id'])
        self.assertEqual('EzproxyReportingFactProcessor',
                         results[0]['em_create_dw_job_name'])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
예제 #10
0
    def test_end_to_end(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')
        reader = ListReader(self.sample_data)
        ezproxy_processor = EzproxyProcessor(reader, writer, job_info,
                                             self.logger)
        ezproxy_processor.execute()

        results = ezproxy_processor.writer.list

        expected_keys = sorted([
            't1_ezp_sessns_snap_actv_sessns_cnt',
            't1_ezp_sessns_snap_tmstmp__ezp_sessns_snap_clndr_dt_dim_key',
            't1_ezp_sessns_virtual_hosts_cnt',
            't1_mbr_lbry_cd__ezp_sessns_snap_mbr_lbry_dim_key',
            't2_ezp_sessns_snap_tmstmp__ezp_sessns_snap_tmstmp',
            't3_ezp_sessns_snap_tmstmp__ezp_sessns_snap_time_of_day_dim_key',
            'em_create_user_id', 'em_create_dw_prcsng_cycle_id',
            'em_create_dw_job_exectn_id', 'em_create_dw_job_version_no',
            'em_create_dw_job_name', 'em_create_tmstmp',
            'in_ezp_sessns_snap_tmstmp', 'in_mbr_lbry_cd'
        ])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
예제 #11
0
    def test_simple_array(self):
        array = ['Line 1', 'Line 2']

        reader = ListReader(array)
        self.assertEqual(array[0], next(iter(reader)))
        self.assertEqual(array[1], next(iter(reader)))
예제 #12
0
    def test_copy_mai50_z30_stage1_to_stage2(self):
        sample_data = [
            {
                # Sample from dw_stg_1_mai50_z30
                '_sa_instance_state': '',
                'db_operation_cd': 'U',
                'em_create_dw_job_exectn_id': 1,
                'em_create_dw_job_name': 'CreateFileEquivalentTable',
                'em_create_dw_job_version_no': '0.0',
                'em_create_dw_prcsng_cycle_id': self.job_info.prcsng_cycle_id,
                'em_create_tmstmp': 'datetime.datetime(2019, 9, 17, 8, 53, 56, 619908)',
                'em_create_user_id': 'test_user',
                'rec_trigger_key': '000000084000120',
                'rec_type_cd': 'D',
                'z30_85x_type': ' ',
                'z30_alpha': 'L',
                'z30_arrival_date': '00000000',
                'z30_barcode': '31430001459330                ',
                'z30_call_no': '$$hE185$$i.M43 1969a',
                'z30_call_no_2': '',
                'z30_call_no_2_key': '                                                                                ',
                'z30_call_no_2_type': ' ',
                'z30_call_no_key': 'E3185.-M43--41969-A                                                             ',
                'z30_call_no_type': '0',
                'z30_cataloger': '          ',
                'z30_chronological_i': '',
                'z30_chronological_j': '',
                'z30_chronological_k': '',
                'z30_chronological_l': '',
                'z30_chronological_m': '',
                'z30_collection': 'HOLD ',
                'z30_copy_id': '     ',
                'z30_date_last_return': '20190913',
                'z30_depository_id': '     ',
                'z30_description': 'v.1',
                'z30_enumeration_a': '1',
                'z30_enumeration_b': '',
                'z30_enumeration_c': '',
                'z30_enumeration_d': '',
                'z30_enumeration_e': '',
                'z30_enumeration_f': '',
                'z30_enumeration_g': '',
                'z30_enumeration_h': '',
                'z30_expected_arrival_date': '00000000',
                'z30_gap_indicator': ' ',
                'z30_hol_doc_number_x': '001668394',
                'z30_hour_last_return': '1157',
                'z30_inventory_number': '',
                'z30_inventory_number_date': '00000000',
                'z30_ip_last_return': '128.8.44.58',
                'z30_ip_last_return_v6': '',
                'z30_issue_date': '00000000',
                'z30_item_process_status': '  ',
                'z30_item_statistic': '          ',
                'z30_item_status': '01',
                'z30_last_shelf_report_date': '00000000',
                'z30_linking_number': '000000000',
                'z30_maintenance_count': '000',
                'z30_material': 'BOOK ',
                'z30_no_loans': '010',
                'z30_note_circulation': '',
                'z30_note_internal': '',
                'z30_note_opac': '',
                'z30_on_shelf_date': '00000000',
                'z30_on_shelf_seq': '000000',
                'z30_open_date': '00000000',
                'z30_order_number': '',
                'z30_pages': '',
                'z30_price': '          ',
                'z30_process_status_date': '00000000',
                'z30_rec_key': '000000084000120',
                'z30_rec_key_2': '0000000000000000000',
                'z30_rec_key_3': '                                   00000',
                'z30_shelf_report_number': '                    ',
                'z30_sub_library': 'CPOSS',
                'z30_supp_index_o': '',
                'z30_temp_location': 'N',
                'z30_upd_time_stamp': '201909131157258',
                'z30_update_date': '00000000',
            }
        ]

        aleph_library = 'mai50'
        reader = ListReader(sample_data)
        processor = CopyStage1ToStage2.create(reader, self.writer, self.job_info, self.logger, aleph_library)
        processor.execute()
        results = self.writer.list

        self.assertEqual(len(sample_data), len(results))
        expected_keys = [
            'db_operation_cd',
            'dw_stg_2_aleph_lbry_name',
            'in_z30_85x_type',
            'in_z30_alpha',
            'in_z30_arrival_date',
            'in_z30_barcode',
            'in_z30_call_no',
            'in_z30_call_no_2',
            'in_z30_call_no_2_key',
            'in_z30_call_no_2_type',
            'in_z30_call_no_key',
            'in_z30_call_no_type',
            'in_z30_cataloger',
            'in_z30_chronological_i',
            'in_z30_chronological_j',
            'in_z30_chronological_k',
            'in_z30_chronological_l',
            'in_z30_chronological_m',
            'in_z30_collection',
            'in_z30_copy_id',
            'in_z30_date_last_return',
            'in_z30_depository_id',
            'in_z30_description',
            'in_z30_enumeration_a',
            'in_z30_enumeration_b',
            'in_z30_enumeration_c',
            'in_z30_enumeration_d',
            'in_z30_enumeration_e',
            'in_z30_enumeration_f',
            'in_z30_enumeration_g',
            'in_z30_enumeration_h',
            'in_z30_expected_arrival_date',
            'in_z30_gap_indicator',
            'in_z30_hol_doc_number_x',
            'in_z30_hour_last_return',
            'in_z30_inventory_number',
            'in_z30_inventory_number_date',
            'in_z30_ip_last_return',
            'in_z30_ip_last_return_v6',
            'in_z30_issue_date',
            'in_z30_item_process_status',
            'in_z30_item_statistic',
            'in_z30_item_status',
            'in_z30_last_shelf_report_date',
            'in_z30_linking_number',
            'in_z30_maintenance_count',
            'in_z30_material',
            'in_z30_no_loans',
            'in_z30_note_circulation',
            'in_z30_note_internal',
            'in_z30_note_opac',
            'in_z30_on_shelf_date',
            'in_z30_on_shelf_seq',
            'in_z30_open_date',
            'in_z30_order_number',
            'in_z30_pages',
            'in_z30_price',
            'in_z30_process_status_date',
            'in_z30_rec_key',
            'in_z30_rec_key_2',
            'in_z30_rec_key_3',
            'in_z30_shelf_report_number',
            'in_z30_sub_library',
            'in_z30_supp_index_o',
            'in_z30_temp_location',
            'in_z30_upd_time_stamp',
            'in_z30_update_date',
            'em_create_dw_prcsng_cycle_id',
            'em_create_dw_job_exectn_id',
            'em_create_dw_job_name',
            'em_create_dw_job_version_no',
            'em_create_user_id',
            'em_create_tmstmp'
        ]
        self.assertEqual(sorted(expected_keys), sorted(list(results[0].keys())))

        self.assertEqual('U', results[0]['db_operation_cd'])
        self.assertEqual('$$hE185$$i.M43 1969a', results[0]['in_z30_call_no'])
        self.assertEqual('31430001459330                ', results[0]['in_z30_barcode'])
        self.assertEqual('                                   00000', results[0]['in_z30_rec_key_3'])
        self.assertEqual('BOOK ', results[0]['in_z30_material'])
        self.assertEqual('mai50', results[0]['dw_stg_2_aleph_lbry_name'])
        self.assertEqual(processor.job_name(), results[0]['em_create_dw_job_name'])
예제 #13
0
    def test_bib_rec_preprocess(self):
        """
        tests the case where there's no whitespace
        """

        sample_data = [{  # pk data
            'db_operation_cd':
            'U',
            'dw_stg_2_aleph_lbry_name':
            'mai60',
            'em_create_dw_prcsng_cycle_id':
            '-1',
            # z00 don't have trims
            'in_z00_doc_number':
            '000019087',
            'in_z00_no_lines':
            '0011',
            'in_z00_data_len':
            '000400',
            # z13 has trims
            'in_z13_title':
            'A literary history of America',
            'in_z13_author':
            'Wendell, Barrett, 1855-1921',
            'in_z13_imprint':
            'New York, Haskell House Publishers, 1968'
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        sample_json_config = {
            'z00_doc_number': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z00_no_lines': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'in_z00_data_len': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z13_title': {
                "preprocessing_info": {
                    "pre_or_post_dq":
                    "N/A",
                    "pre_action":
                    "Trim",
                    "pre_detailed_instructions":
                    "Remove leading and trailing spaces"
                }
            },
            'z13_author': {
                "preprocessing_info": {
                    "pre_or_post_dq":
                    "N/A",
                    "pre_action":
                    "Trim",
                    "pre_detailed_instructions":
                    "Remove leading and trailing spaces"
                }
            },
            'z13_imprint': {
                "preprocessing_info": {
                    "pre_or_post_dq":
                    "N/A",
                    "pre_action":
                    "Trim",
                    "pre_detailed_instructions":
                    "Remove leading and trailing spaces"
                }
            }
        }

        pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number',
            'em_create_dw_prcsng_cycle_id'
        ]

        step = Preprocess(reader, writer, job_info, logger, sample_json_config,
                          pk_list)
        step.execute()
        results = step.writer.list

        expected_keys = sorted([
            'in_z00_doc_number', 'pp_z00_doc_number',
            'dw_stg_2_aleph_lbry_name', 'db_operation_cd', 'pp_z00_no_lines',
            'pp_z13_title', 'pp_z13_author', 'pp_z00_data_len',
            'pp_z13_imprint', 'em_update_dw_prcsng_cycle_id',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_user_id',
            'em_update_tmstmp', 'em_create_dw_prcsng_cycle_id'
        ])

        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual("000019087", results[0]['pp_z00_doc_number'])
        self.assertEqual('0011', results[0]['pp_z00_no_lines'])
        self.assertEqual('000400', results[0]['pp_z00_data_len'])
예제 #14
0
    def test_z00_pp(self):

        sample_data = [{
            'db_operation_cd': 'U',
            'in_z00_data': '',
            'in_z00_data_len': '001726',
            'in_z00_doc_number': '000181506',
            'in_z00_no_lines': '0038',
            'dw_stg_2_aleph_lbry_name': 'mai01',
            'em_create_dw_prcsng_cycle_id': '-1',
        }]

        reader = ListReader(sample_data)
        writer = ListWriter()

        job_info = JobInfo(-1, 'test_user', '1', '1')

        logger = None

        sample_json_config = {
            'z00_doc_number': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z00_no_lines': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            },
            'z00_data_len': {
                "preprocessing_info": {
                    "pre_or_post_dq": "N/A",
                    "pre_action": "N/A",
                    "pre_detailed_instructions": "N/A"
                }
            }
        }

        pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number',
            'em_create_dw_prcsng_cycle_id'
        ]

        step = Preprocess(reader, writer, job_info, logger, sample_json_config,
                          pk_list)
        step.execute()
        results = step.writer.list

        expected_keys = sorted([
            'in_z00_doc_number', 'pp_z00_doc_number',
            'dw_stg_2_aleph_lbry_name', 'db_operation_cd', 'pp_z00_no_lines',
            'pp_z00_data_len', 'pp_z00_data', 'em_update_dw_prcsng_cycle_id',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_user_id',
            'em_update_tmstmp', 'em_create_dw_prcsng_cycle_id'
        ])
        self.assertEqual(False,
                         Preprocess.need_preprocess(sample_json_config, ''))
        self.assertEqual(expected_keys, sorted(list(results[0].keys())))
        self.assertEqual("000181506", results[0]['pp_z00_doc_number'])
        self.assertEqual("0038", results[0]['pp_z00_no_lines'])
        self.assertEqual("001726", results[0]['pp_z00_data_len'])
        self.assertEqual("", results[0]['pp_z00_data'])
예제 #15
0
    def test_dataquality_bib_rec(self):
        writer = ListWriter()
        job_info = JobInfo(-1, 'test_user', '1', '1')

        # z00
        json_config = self.bib_rec_json_config
        reader = ListReader(self.bib_record_dimension_sample_data_z00)

        z00_pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z00_doc_number',
            'em_create_dw_prcsng_cycle_id'
        ]
        z13_pk_list = [
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z13_rec_key',
            'em_create_dw_prcsng_cycle_id'
        ]

        data_quality_processor = DataQualityProcessor(reader, writer, job_info,
                                                      self.logger, json_config,
                                                      z00_pk_list)
        data_quality_processor.execute()
        z00_results = data_quality_processor.writer.list

        # z13
        reader = ListReader(self.bib_record_dimension_sample_data_z00)
        data_quality_processor = DataQualityProcessor(reader, writer, job_info,
                                                      self.logger, json_config,
                                                      z13_pk_list)
        data_quality_processor.execute()
        z13_results = data_quality_processor.writer.list

        z00_expected_keys = sorted([
            'db_operation_cd', 'dq_z00_data', 'dq_z00_data_len',
            'dq_z00_doc_number', 'dq_z00_no_lines', 'dw_stg_2_aleph_lbry_name',
            'em_update_dw_job_exectn_id', 'em_update_dw_job_name',
            'em_update_dw_job_version_no', 'em_update_dw_prcsng_cycle_id',
            'em_update_tmstmp', 'em_update_user_id', 'in_z00_doc_number',
            'rm_dq_check_excptn_cnt', 'rm_suspend_rec_flag',
            'rm_suspend_rec_reason_cd'
        ])
        z13_expected_keys = sorted([
            'db_operation_cd', 'dw_stg_2_aleph_lbry_name', 'in_z13_rec_key',
            'dq_z13_year', 'dq_z13_open_date', 'dq_z13_update_date',
            'dq_z13_author', 'dq_z13_title', 'em_update_dw_prcsng_cycle_id',
            'em_update_user_id', 'em_update_dw_job_exectn_id',
            'em_update_dw_job_version_no', 'em_update_dw_job_name',
            'em_update_tmstmp', 'rm_dq_check_excptn_cnt',
            'rm_suspend_rec_flag', 'rm_suspend_rec_reason_cd'
        ])

        self.assertEqual(z00_expected_keys,
                         sorted(list(z00_results[0].keys())))
        self.assertEqual(z00_expected_keys,
                         sorted(list(z00_results[1].keys())))
        self.assertEqual(z13_expected_keys,
                         sorted(list(z13_results[3].keys())))
        elf.assertEqual(z13_expected_keys, sorted(list(z13_results[5].keys())))

        self.assertEqual("SUS", results[0]['dq_z00_doc_number'])
        self.assertEqual(1, results[0]['rm_dq_check_excptn_cnt'])
        self.assertEqual("MIS", results[0]['rm_suspend_rec_reason_cd'])
        pdb.set_trace()
        self.assertEqual(None, results[3]['dq_z13_open_date'])
        self.assertEqual(1, results[3]['rm_dq_check_excptn_cnt'])
        self.assertEqual("MIS", results[0]['rm_suspend_rec_reason_cd'])

        self.assertEqual(None, results[4]['dq_z13_open_date'])
        self.assertEqual(1, results[4]['rm_dq_check_excptn_cnt'])
        self.assertEqual("LEN", results[1]['rm_suspend_rec_reason_cd'])

        self.assertEqual('0049', results[0]['dq_z00_no_lines'])
        self.assertEqual('001970', results[0]['dq_z00_data_len'])
        self.assertEqual('20130225', results[5]['dq_z13_update_date'])
        self.assertEqual('1969', results[5]['dq_z13_year'])

        self.assertEqual('20021124', results[5]['pp_z13_open_date'])