Ejemplo n.º 1
0
def get_asmt_rec_id(guid_batch, tenant_name, asmt_rec_id_info):
    '''
    Returns asmt_rec_id from dim_asmt table
    Steps:
    1. Get guid_asmt from integration table INT_SBAC_ASMT
    2. Select asmt_rec_id from dim_asmt by the same guid_amst got from 1. It should have 1 value
    '''
    source_table_name = asmt_rec_id_info['source_table']
    guid_column_name_in_source = asmt_rec_id_info['guid_column_in_source']
    target_table_name = asmt_rec_id_info['target_table']
    guid_column_name_in_target = asmt_rec_id_info['guid_column_name']
    rec_id_column_name = asmt_rec_id_info['rec_id']

    # connect to integration table, to get the value of guid_asmt
    with get_udl_connection() as udl_conn:
        int_table = udl_conn.get_table(source_table_name)
        query = select([int_table.c[guid_column_name_in_source]], from_obj=int_table, limit=1)
        query = query.where(int_table.c['guid_batch'] == guid_batch)
        results = udl_conn.get_result(query)
        if results:
            guid_column_value = results[0][guid_column_name_in_source]

    # connect to target table, to get the value of asmt_rec_id
    with get_target_connection(tenant_name, guid_batch) as target_conn:
        dim_asmt = target_conn.get_table(target_table_name)
        query = select([dim_asmt.c[rec_id_column_name]], from_obj=dim_asmt, limit=1)
        query = query.where(dim_asmt.c[guid_column_name_in_target] == guid_column_value)
        query = query.where(and_(dim_asmt.c['batch_guid'] == guid_batch))
        results = target_conn.get_result(query)
        if results:
            asmt_rec_id = results[0][rec_id_column_name]

    return asmt_rec_id
Ejemplo n.º 2
0
    def get_edware_asmt_score_avgs(self, tenant, schema):
        with get_target_connection(tenant, schema) as conn:
            fact = conn.get_table('fact_asmt_outcome_vw')
            query = select([
                func.avg(fact.c.asmt_score),
                func.avg(fact.c.asmt_score_range_min),
                func.avg(fact.c.asmt_score_range_max),
                func.avg(fact.c.asmt_claim_1_score),
                func.avg(fact.c.asmt_claim_1_score_range_min),
                func.avg(fact.c.asmt_claim_1_score_range_max),
                func.avg(fact.c.asmt_claim_2_score),
                func.avg(fact.c.asmt_claim_2_score_range_min),
                func.avg(fact.c.asmt_claim_2_score_range_max),
                func.avg(fact.c.asmt_claim_3_score),
                func.avg(fact.c.asmt_claim_3_score_range_min),
                func.avg(fact.c.asmt_claim_3_score_range_max),
                func.avg(fact.c.asmt_claim_4_score),
                func.avg(fact.c.asmt_claim_4_score_range_min),
                func.avg(fact.c.asmt_claim_4_score_range_max)
            ],
                           from_obj=fact)
            result = conn.execute(query)
            for row in result:
                star_asmt_avgs = row

            return star_asmt_avgs
def handle_duplicates_in_dimensions(tenant_name, guid_batch):
    """Handle duplicate records in dimensions by marking them as deleted


    Steps:
    1. Soft delete records (Mark rec_status as 'S') in pre-prod dimensions that are already existing in production database
       The match is done based on natural_key columns of the table and some other key columns listed
    2. Update the rec_id of the record marked for delete with the id of the matching record found in prod. This is needed for
       step which updates foreign keys in fact asmt outcome

    :param tenant_name: tenant name, to get target database connection
    :param guid_batch:  batch buid

    @return: Number of affected rows
    """
    affected_rows = 0
    with get_target_connection(tenant_name,
                               guid_batch) as target_conn, get_prod_connection(
                                   tenant_name) as prod_conn:

        tables = get_tables_starting_with(target_conn.get_metadata(),
                                          Constants.DIM_TABLES_PREFIX)
        for table_name in tables:
            target_db_helper = HandleUpsertHelper(target_conn, guid_batch,
                                                  table_name)
            prod_db_helper = HandleUpsertHelper(prod_conn, guid_batch,
                                                table_name)
            for record in target_db_helper.find_all():
                matched = prod_db_helper.find_by_natural_key(record)
                if not matched:
                    continue
                # soft delete the record and set its pk as the pk of the matched record
                target_db_helper.soft_delete_and_update(record, matched)
                affected_rows += 1
    return affected_rows
 def validate_udl_stats(self):
     with StatsDBConnection() as conn:
         table = conn.get_table('udl_stats')
         query = select([table.c.load_status])
         result = conn.execute(query).fetchall()
         expected_result = [('migrate.ingested', ), ('migrate.failed', )]
         # Validate that one migration batch is failed and one is successful.
         self.assertEquals(result, expected_result)
         # Validate that schema clean up is successful for both the scenario: Migration.ingested and migration.failed
         schema_name = select([table.c.batch_guid])
         schema_name_result = conn.execute(schema_name).fetchall()
         tpl_schema_name_1 = schema_name_result[0]
         tpl_schema_name_2 = schema_name_result[1]
         schema_name_1 = tpl_schema_name_1[0]
         schema_name_2 = tpl_schema_name_2[0]
         with get_target_connection(self.tenant) as connector:
             query = select(['schema_name'],
                            from_obj=['information_schema.schemata'
                                      ]).where('schema_name in :a')
             params = [bindparam('a', (schema_name_1, schema_name_2))]
             new_query = text(str(query), bindparams=params)
             result = connector.execute(new_query).fetchall()
             self.assertEqual(
                 len(result), 0,
                 "Schema clean up after migration is unsuccessful")
def explode_data_to_fact_table(conf, source_table, target_table,
                               column_mapping, column_types):
    '''
    Main function to explode data from integration table INT_SBAC_ASMT_OUTCOME to star schema table fact_asmt_outcome_vw
    The basic steps are:
    0. Get two foreign keys: asmt_rec_id and student_rec_id
    1. Insert data from INT_SBAC_ASMT_OUTCOME to fact_asmt_outcome_vw. But for columns inst_hier_rec_id and student_rec_id ,
       put the temporary value as -1
    2. Update foreign key inst_hier_rec_id by comparing district_id, school_id and state_code
    3. Update foreign key student_rec_id by comparing student_id, batch_guid
    '''
    with get_target_connection(conf[mk.TENANT_NAME],
                               conf[mk.TARGET_DB_SCHEMA]) as conn:
        # Drops FK
        conn.execute(
            enable_trigger_query(conf[mk.TARGET_DB_SCHEMA], target_table,
                                 False))

        # create insertion insert_into_fact_table_query
        result = conn.execute(
            create_insert_query(conf, source_table, target_table,
                                column_mapping, column_types, False))
        affected_rows = result.rowcount

        # update fact table's foreign key columns' id
        fact_table = conn.get_table(target_table)
        for fk in edschema_util.get_foreign_key_reference_columns(fact_table):
            for query in update_foreign_rec_id_query(fk):
                result = conn.execute(query)

    return affected_rows
Ejemplo n.º 6
0
def explode_data_to_fact_table(conf, source_table, target_table, column_mapping, column_types):
    '''
    Main function to explode data from integration table INT_SBAC_ASMT_OUTCOME to star schema table fact_asmt_outcome_vw
    The basic steps are:
    0. Get two foreign keys: asmt_rec_id and student_rec_id
    1. Insert data from INT_SBAC_ASMT_OUTCOME to fact_asmt_outcome_vw. But for columns inst_hier_rec_id and student_rec_id ,
       put the temporary value as -1
    2. Update foreign key inst_hier_rec_id by comparing district_id, school_id and state_code
    3. Update foreign key student_rec_id by comparing student_id, batch_guid
    '''
    with get_target_connection(conf[mk.TENANT_NAME], conf[mk.TARGET_DB_SCHEMA]) as conn:
        # Drops FK
        conn.execute(enable_trigger_query(conf[mk.TARGET_DB_SCHEMA], target_table, False))

        # create insertion insert_into_fact_table_query
        result = conn.execute(create_insert_query(conf, source_table, target_table, column_mapping, column_types, False))
        affected_rows = result.rowcount

        # update fact table's foreign key columns' id
        fact_table = conn.get_table(target_table)
        for fk in edschema_util.get_foreign_key_reference_columns(fact_table):
            for query in update_foreign_rec_id_query(fk):
                result = conn.execute(query)

    return affected_rows
def explode_data_to_dim_table(conf, source_table, target_table, column_mapping,
                              column_types):
    '''
    Main function to move data from source table to target tables.
    Source table can be INT_SBAC_ASMT, and INT_SBAC_ASMT_OUTCOME. Target table can be any dim tables in star schema.
    @param conf: one dictionary which has database settings, and guid_batch
    @param source_table: name of the source table where the data comes from
    @param target_table: name of the target table where the data should be moved to
    @param column_mapping: list of tuple of:
                           column_name_in_target, column_name_in_source
    @param column_types: data types of all columns in one target table
    '''
    # create database connection to target
    with get_target_connection(conf[mk.TENANT_NAME],
                               conf[mk.GUID_BATCH]) as conn:
        # create insertion query
        # TODO: find out if the affected rows, time can be returned, so that the returned info can be put in the log
        # send only data that is needed to be inserted (such insert, update) to dimenstion table
        query = create_insert_query(
            conf, source_table, target_table, column_mapping, column_types,
            True, 'C' if source_table in op_table_conf else None)

        logger.info(compile_query_to_sql_text(query))

        # execute the query
        affected_rows = execute_udl_queries(
            conn, [query],
            'Exception -- exploding data from integration to target ' +
            '{target_table}'.format(target_table=target_table),
            'move_to_target', 'explode_data_to_dim_table')

    return affected_rows
Ejemplo n.º 8
0
def explode_data_to_dim_table(conf, source_table, target_table, column_mapping, column_types):
    '''
    Main function to move data from source table to target tables.
    Source table can be INT_SBAC_ASMT, and INT_SBAC_ASMT_OUTCOME. Target table can be any dim tables in star schema.
    @param conf: one dictionary which has database settings, and guid_batch
    @param source_table: name of the source table where the data comes from
    @param target_table: name of the target table where the data should be moved to
    @param column_mapping: list of tuple of:
                           column_name_in_target, column_name_in_source
    @param column_types: data types of all columns in one target table
    '''
    # create database connection to target
    with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn:
        # create insertion query
        # TODO: find out if the affected rows, time can be returned, so that the returned info can be put in the log
        # send only data that is needed to be inserted (such insert, update) to dimenstion table
        query = create_insert_query(conf, source_table, target_table, column_mapping, column_types, True,
                                    'C' if source_table in op_table_conf else None)

        logger.info(compile_query_to_sql_text(query))

        # execute the query
        affected_rows = execute_udl_queries(conn, [query],
                                            'Exception -- exploding data from integration to target ' +
                                            '{target_table}'.format(target_table=target_table),
                                            'move_to_target', 'explode_data_to_dim_table')

    return affected_rows
Ejemplo n.º 9
0
def handle_duplicates_in_dimensions(tenant_name, guid_batch):
    """Handle duplicate records in dimensions by marking them as deleted


    Steps:
    1. Soft delete records (Mark rec_status as 'S') in pre-prod dimensions that are already existing in production database
       The match is done based on natural_key columns of the table and some other key columns listed
    2. Update the rec_id of the record marked for delete with the id of the matching record found in prod. This is needed for
       step which updates foreign keys in fact asmt outcome

    :param tenant_name: tenant name, to get target database connection
    :param guid_batch:  batch buid

    @return: Number of affected rows
    """
    affected_rows = 0
    with get_target_connection(tenant_name, guid_batch) as target_conn, get_prod_connection(tenant_name) as prod_conn:

        tables = get_tables_starting_with(target_conn.get_metadata(), Constants.DIM_TABLES_PREFIX)
        for table_name in tables:
            target_db_helper = HandleUpsertHelper(target_conn, guid_batch, table_name)
            prod_db_helper = HandleUpsertHelper(prod_conn, guid_batch, table_name)
            for record in target_db_helper.find_all():
                matched = prod_db_helper.find_by_natural_key(record)
                if not matched:
                    continue
                # soft delete the record and set its pk as the pk of the matched record
                target_db_helper.soft_delete_and_update(record, matched)
                affected_rows += 1
    return affected_rows
Ejemplo n.º 10
0
def move_data_from_int_tables_to_target_table(conf, task_name, source_tables,
                                              target_table):
    """Move student registration data from source integration tables to target table.
    Source tables are INT_STU_REG and INT_STU_REG_META. Target table is student_registration.

    @param conf: Configuration for particular load type (assessment or studentregistration)
    @param task_name: Name of the celery task invoking this method
    @param source_tables: Names of the source tables from where the data comes
    @param target_table: Name of the target table to where the data should be moved

    @return: Number of inserted rows
    """
    column_and_type_mapping = get_column_and_type_mapping(
        conf, task_name, target_table, source_tables)

    with get_target_connection(conf[mk.TENANT_NAME],
                               conf[mk.GUID_BATCH]) as conn_to_target_db:
        insert_query = create_sr_table_select_insert_query(
            conf, target_table, column_and_type_mapping)
        logger.info(insert_query)
        affected_rows = execute_udl_queries(
            conn_to_target_db, [insert_query],
            'Exception -- moving data from integration {int_table} to target {target_table}'
            .format(int_table=source_tables[0], target_table=target_table),
            'move_to_target', 'move_data_from_int_tables_to_target_table')

    return affected_rows
Ejemplo n.º 11
0
    def get_star_schema_demographic_counts(self, tenant, schema):
        demographics = [
            'dmg_eth_hsp', 'dmg_eth_ami', 'dmg_eth_asn', 'dmg_eth_blk',
            'dmg_eth_pcf', 'dmg_eth_wht', 'dmg_eth_2om', 'dmg_prg_iep',
            'dmg_prg_lep', 'dmg_prg_504', 'dmg_sts_ecd', 'dmg_sts_mig'
        ]
        results_dict = {}
        with get_target_connection(tenant, schema) as conn:
            fact = conn.get_table('fact_asmt_outcome_vw')
            for entry in demographics:
                demo_query = select(
                    [func.count(fact.c[entry])],
                    from_obj=fact).where(fact.c[entry] == true())
                result = conn.execute(demo_query)
                for row in result:
                    demo_count = row[0]

                results_dict[entry] = demo_count

                #get derived ethnicity
                eth_query = select(
                    [func.count(fact.c.dmg_eth_derived)],
                    from_obj=fact).where(fact.c.dmg_eth_derived is not None)
                result = conn.execute(eth_query)
                for row in result:
                    derived_count = row[0]
                results_dict['dmg_eth_derived'] = derived_count

        return results_dict
    def test_1_load_int_to_star(self):
        self.load_int_sbac_asmt()
        self.load_int_sbac_asmt_outcome()

        # explode to dim tables

        table_map, column_map = move_to_target_setup.get_table_and_column_mapping(
            self.conf, self.load_to_dim_task_name, self.dim_table_prefix)
        for target in table_map.keys():
            target_columns = column_map[target]
            column_types = move_to_target_setup.get_table_column_types(
                self.conf, target, list(target_columns.keys()))
            move_to_target.explode_data_to_dim_table(self.conf,
                                                     table_map[target], target,
                                                     target_columns,
                                                     column_types)

        # explode to fact table
        table_map, column_map = move_to_target_setup.get_table_and_column_mapping(
            self.conf, self.load_to_fact_task_name, self.fact_table_prefix)

        for fact_table, source_table in table_map.items():
            column_types = move_to_target_setup.get_table_column_types(
                self.conf, fact_table, list(column_map[fact_table].keys()))
            move_to_target.explode_data_to_fact_table(self.conf, source_table,
                                                      fact_table,
                                                      column_map[fact_table],
                                                      column_types)

        # handle deletion case
        move_to_target.handle_updates_and_deletes(self.conf)

        # check star schema table counts
        with get_target_connection(self.tenant_code, self.guid_batch) as conn:
            tables_to_check = {
                'dim_asmt': 1,
                'dim_inst_hier': 99,
                'dim_student': 94,
                'fact_asmt_outcome_vw': 71
            }
            for entry in tables_to_check.keys():
                table = conn.get_table(entry)
                query = select([count()], from_obj=table)
                result = conn.execute(query)
                #self.assertEqual(int(result.fetchall()[0][0]), tables_to_check[entry])

        # check asmt score avgs
        int_asmt_avgs = self.get_integration_asmt_score_avgs()
        star_asmt_avgs = self.get_edware_asmt_score_avgs(
            self.tenant_code, self.guid_batch)

        self.assertEqual(int_asmt_avgs, star_asmt_avgs)

        # check demographic counts
        int_demo_dict = self.get_integration_demographic_counts()
        star_demo_dict = self.get_star_schema_demographic_counts(
            self.tenant_code, self.guid_batch)

        self.assertEqual(int_demo_dict, star_demo_dict)
Ejemplo n.º 13
0
def clean_up_unused_schemas():
    with get_target_connection("cat") as conn:
        schemas = conn.execute(
            "select schema_name from information_schema.schemata")
        for schema in schemas:
            schema_name = schema[0]
            if re.match('^\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$', schema_name):
                drop_schema_by_name(conn, schema_name)
 def test_update_pre_prod_for_records_to_be_deleted(self):
     with get_target_connection(MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch) as target_conn, get_prod_connection(MatchAndDeleteFTest.tenant_code) as prod_conn:
         records_to_be_deleted = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw')
         proxy_rows = move_to_target.yield_records_to_be_deleted(prod_conn, 'fact_asmt_outcome_vw', records_to_be_deleted, batch_size=10)
         for rows in proxy_rows:
             move_to_target.update_rec_id_for_records_to_delete(self.conf, target_conn, 'fact_asmt_outcome_vw', rows)
         mismatches = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw')
         self.assertEqual(0, len(mismatches))
 def test_match_all_records_to_be_deleted_with_prod(self):
     with get_target_connection(MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch) as target_conn, get_prod_connection(MatchAndDeleteFTest.tenant_code) as prod_conn:
         records_to_be_deleted = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw')
         proxy_rows = move_to_target.yield_records_to_be_deleted(prod_conn, 'fact_asmt_outcome_vw', records_to_be_deleted, batch_size=10)
         result = []
         for rows in proxy_rows:
             result.extend(rows)
         self.assertEqual(4, len(result))
 def count_rows(self, status=None):
     with get_target_connection(MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch) as conn:
         fact = conn.get_table('fact_asmt_outcome_vw')
         query = select([count(fact.c.asmt_outcome_vw_rec_id).label('count')], from_obj=fact)
         if status:
             query = query.where(fact.c.rec_status == status)
         result = conn.get_result(query)
         return int(result[0]['count'])
Ejemplo n.º 17
0
    def validate_edware_database(self, schema_name):
        with get_target_connection(self.tenant, schema_name) as ed_connector:
            fact_table = ed_connector.get_table('fact_asmt_outcome_vw')
            pre_prod_data = select([
                fact_table.c.student_rec_id, fact_table.c.inst_hier_rec_id,
                fact_table.c.asmt_rec_id
            ])
            pre_prod_table = ed_connector.execute(pre_prod_data).fetchall()
            self.expected_pk_val_for_student = pre_prod_table[0][0]
            self.expected_pk_val_for_inst = pre_prod_table[0][1]
            self.expected_pk_val_for_asmt = pre_prod_table[0][2]

            lean_table = ed_connector.get_table('fact_asmt_outcome')
            pre_prod_data = select([
                lean_table.c.student_rec_id, fact_table.c.inst_hier_rec_id,
                fact_table.c.asmt_rec_id
            ])
            pre_prod_table = ed_connector.execute(pre_prod_data).fetchall()
            self.lean_pk_val_for_student = pre_prod_table[0][0]
            self.lean_pk_val_for_inst = pre_prod_table[0][1]
            self.lean_pk_val_for_asmt = pre_prod_table[0][2]

            dim_student = ed_connector.get_table('dim_student')
            dim_student_data = select([dim_student.c.student_rec_id])
            result_student_id = ed_connector.execute(
                dim_student_data).fetchall()
            self.expected_student_id = result_student_id[0][0]

            dim_inst_hier = ed_connector.get_table('dim_inst_hier')
            dim_inst_hier_data = select([dim_inst_hier.c.inst_hier_rec_id])
            dim_inst_hier_result = ed_connector.execute(
                dim_inst_hier_data).fetchall()
            self.expected_inst_hier_rec_id = dim_inst_hier_result[0][0]

            dim_asmt = ed_connector.get_table('dim_asmt')
            dim_asmt_data = select([dim_asmt.c.asmt_rec_id])
            dim_asmt_result = ed_connector.execute(dim_asmt_data).fetchall()
            self.expected_asmt_rec_id = dim_asmt_result[0][0]

            self.assertEqual(
                self.expected_student_id, self.expected_pk_val_for_student,
                "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(
                self.expected_inst_hier_rec_id, self.expected_pk_val_for_inst,
                "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(
                self.expected_asmt_rec_id, self.expected_pk_val_for_asmt,
                "Error:student_rec_id of dim table not maching to fact table")

            self.assertEqual(
                self.expected_student_id, self.lean_pk_val_for_student,
                "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(
                self.expected_inst_hier_rec_id, self.lean_pk_val_for_inst,
                "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(
                self.expected_asmt_rec_id, self.lean_pk_val_for_asmt,
                "Error:student_rec_id of dim table not maching to fact table")
 def test_get_records_for_deletion(self):
     self.load_int_to_star()
     self.assertEqual(4, self.count_rows())
     with get_target_connection(
             MatchAndDeleteFTest.tenant_code,
             MatchAndDeleteFTest.guid_batch) as target_conn:
         records_to_be_deleted = move_to_target.get_records_marked_for_deletion(
             self.conf, target_conn, 'fact_asmt_outcome_vw')
         self.assertEqual(4, len(records_to_be_deleted))
Ejemplo n.º 19
0
 def validate_edware_database(self, schema_name):
     with get_target_connection('cat') as ed_connector:
         ed_connector.set_metadata_by_reflect(schema_name)
         edware_table = ed_connector.get_table(FACT_TABLE)
         output = select([edware_table.c.batch_guid]).where(edware_table.c.batch_guid == self.batch_id)
         output_data = ed_connector.execute(output).fetchall()
         row_count = len(output_data)
         self.assertGreater(row_count, 1, "Data is loaded to star shema")
         truple_str = (self.batch_id, )
         self.assertIn(truple_str, output_data, "assert successful")
 def count_rows(self, status=None):
     with get_target_connection(MatchAndDeleteFTest.tenant_code,
                                MatchAndDeleteFTest.guid_batch) as conn:
         fact = conn.get_table('fact_asmt_outcome_vw')
         query = select(
             [count(fact.c.asmt_outcome_vw_rec_id).label('count')],
             from_obj=fact)
         if status:
             query = query.where(fact.c.rec_status == status)
         result = conn.get_result(query)
         return int(result[0]['count'])
    def validate_edware_database(self, schema_name):
        with get_target_connection(self.tenant, schema_name) as ed_connector:
            fact_table_vw = ed_connector.get_table('fact_asmt_outcome_vw')
            prod_output_data = select([fact_table_vw.c.rec_status]).where(fact_table_vw.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce', )
            prod_output_table = ed_connector.execute(prod_output_data).fetchall()
            expected_status_val_D = [('D',)]
            self.assertEquals(prod_output_table, expected_status_val_D, 'Status is wrong in fact table for delete record')

            fact_table = ed_connector.get_table('fact_asmt_outcome')
            prod_data = select([fact_table.c.rec_status]).where(fact_table.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce', )
            prod_table = ed_connector.execute(prod_data).fetchall()
            self.assertEquals(prod_table, expected_status_val_D, 'Status is wrong in fact_asmt_table_primary for delete record')
    def test_1_load_int_to_star(self):
        self.load_int_sbac_asmt()
        self.load_int_sbac_asmt_outcome()

        # explode to dim tables

        table_map, column_map = move_to_target_setup.get_table_and_column_mapping(self.conf,
                                                                                  self.load_to_dim_task_name,
                                                                                  self.dim_table_prefix)
        for target in table_map.keys():
            target_columns = column_map[target]
            column_types = move_to_target_setup.get_table_column_types(self.conf, target, list(target_columns.keys()))
            move_to_target.explode_data_to_dim_table(self.conf, table_map[target], target, target_columns, column_types)

        # explode to fact table
        table_map, column_map = move_to_target_setup.get_table_and_column_mapping(self.conf,
                                                                                  self.load_to_fact_task_name,
                                                                                  self.fact_table_prefix)

        for fact_table, source_table in table_map.items():
            column_types = move_to_target_setup.get_table_column_types(self.conf,
                                                                       fact_table,
                                                                       list(column_map[fact_table].keys()))
            move_to_target.explode_data_to_fact_table(self.conf,
                                                      source_table,
                                                      fact_table,
                                                      column_map[fact_table],
                                                      column_types)

        # handle deletion case
        move_to_target.handle_updates_and_deletes(self.conf)

        # check star schema table counts
        with get_target_connection(self.tenant_code, self.guid_batch) as conn:
            tables_to_check = {'dim_asmt': 1, 'dim_inst_hier': 99, 'dim_student': 94, 'fact_asmt_outcome_vw': 71}
            for entry in tables_to_check.keys():
                table = conn.get_table(entry)
                query = select([count()], from_obj=table)
                result = conn.execute(query)
                #self.assertEqual(int(result.fetchall()[0][0]), tables_to_check[entry])

        # check asmt score avgs
        int_asmt_avgs = self.get_integration_asmt_score_avgs()
        star_asmt_avgs = self.get_edware_asmt_score_avgs(self.tenant_code, self.guid_batch)

        self.assertEqual(int_asmt_avgs, star_asmt_avgs)

        # check demographic counts
        int_demo_dict = self.get_integration_demographic_counts()
        star_demo_dict = self.get_star_schema_demographic_counts(self.tenant_code, self.guid_batch)

        self.assertEqual(int_demo_dict, star_demo_dict)
Ejemplo n.º 23
0
def handle_updates_and_deletes(conf):
    """Main handler for updates and deletes


    The method grabs tables to be processed for updates and deletes (Fact tables)
    and runs process_records_to_be_deleted for each table

    :param conf: udl configuration object
    """
    with get_target_connection(conf[mk.TENANT_NAME], conf[mk.TARGET_DB_SCHEMA]) as target_conn, get_prod_connection(conf[mk.TENANT_NAME]) as prod_conn:
        tables = get_tables_starting_with(target_conn.get_metadata(), Constants.FACT_TABLES_PREFIX)
        for table_name in tables:
            process_records_to_be_deleted(conf, target_conn, prod_conn, table_name)
 def validate_edware_database(self):
     with get_target_connection('cat', self.guid_batch_id) as ed_connector:
         edware_table = ed_connector.get_table(FACT_TABLE)
         output = select([edware_table.c.batch_guid]).where(edware_table.c.batch_guid == self.guid_batch_id)
         output_val = select([edware_table.c.student_rec_id]).where(edware_table.c.batch_guid == self.guid_batch_id)
         output_data = ed_connector.execute(output).fetchall()
         output_data1 = ed_connector.execute(output_val).fetchall()
         # Velidate that student_rec_id not containing -1
         self.assertNotIn(-1, output_data1, "Student rec id in not -1 in fact_asmt")
         # Velidate that data loaded into fact_Table after pipeline
         row_count = len(output_data)
         self.assertGreater(row_count, 1, "Data is loaded to star shema")
         truple_str = (self.guid_batch_id, )
         self.assertIn(truple_str, output_data, "assert successful")
 def validate_prepod_tables(self, schema_name):
     with get_target_connection(self.tenant, schema_name) as connection:
         fact_table = connection.get_table('fact_asmt_outcome_vw')
         dim_inst_hier = connection.get_table('dim_inst_hier')
         dim_student = connection.get_table('dim_student')
         dim_asmt = connection.get_table('dim_asmt')
         dim_tables = [dim_asmt, dim_student, dim_inst_hier]
         for table in dim_tables:
                 query = select([table.c.rec_status])
                 result = connection.execute(query).fetchall()
                 expected_status = [('S',)]
                 self.assertEqual(result, expected_status, "Error: In Dim_tables rec_status have not change to S")
         fact_table_data = select([fact_table.c.rec_status])
         fact_table_rows = connection.execute(fact_table_data).fetchall()
         expected_status = [('C',)]
         self.assertEquals(fact_table_rows, expected_status, "Error")
    def get_counts(self, schema_name):
        with get_target_connection(self.tenant_code, schema_name) as conn:
            fact_vw_select = select([func.count()]).select_from(conn.get_table('fact_asmt_outcome_vw'))
            fact_select = select([func.count()]).select_from(conn.get_table('fact_asmt_outcome'))
            asmt_selct = select([func.count()]).select_from(conn.get_table('dim_asmt'))
            inst_select = select([func.count()]).select_from(conn.get_table('dim_inst_hier'))
            stud_select = select([func.count()]).select_from(conn.get_table('dim_student'))
            sr_select = select([func.count()]).select_from(conn.get_table('student_reg'))

            fact_count = conn.execute(fact_vw_select).fetchall()[0][0]
            fact_primary_count = conn.execute(fact_select).fetchall()[0][0]
            asmt_count = conn.execute(asmt_selct).fetchall()[0][0]
            inst_count = conn.execute(inst_select).fetchall()[0][0]
            stud_count = conn.execute(stud_select).fetchall()[0][0]
            sr_count = conn.execute(sr_select).fetchall()[0][0]
        return fact_count, fact_primary_count, asmt_count, inst_count, stud_count, sr_count
def get_table_column_types(conf, target_table, column_names):
    '''
    Main function to get column types of a table by querying the table
    @return a dictionary, which has same ordered keys in the input column_names.
    The values are column types with maximum length if it is defined in table.
    The pattern of the value is: <column_name data_type(length)> or <column_name data_type>
    '''
    column_types = OrderedDict([(column_name, '') for column_name in column_names])
    tenant = conf[mk.TENANT_NAME]

    with get_target_connection(tenant, conf[mk.GUID_BATCH]) as conn:
        table = conn.get_table(target_table)
        for column in table.columns:
            if column.name in column_types.keys():
                column_types[column.name] = column.name + " " + str(column.type)
    return column_types
 def test_match_all_records_to_be_deleted_with_prod(self):
     with get_target_connection(
             MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch
     ) as target_conn, get_prod_connection(
             MatchAndDeleteFTest.tenant_code) as prod_conn:
         records_to_be_deleted = move_to_target.get_records_marked_for_deletion(
             self.conf, target_conn, 'fact_asmt_outcome_vw')
         proxy_rows = move_to_target.yield_records_to_be_deleted(
             prod_conn,
             'fact_asmt_outcome_vw',
             records_to_be_deleted,
             batch_size=10)
         result = []
         for rows in proxy_rows:
             result.extend(rows)
         self.assertEqual(4, len(result))
 def test_update_pre_prod_for_records_to_be_deleted(self):
     with get_target_connection(
             MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch
     ) as target_conn, get_prod_connection(
             MatchAndDeleteFTest.tenant_code) as prod_conn:
         records_to_be_deleted = move_to_target.get_records_marked_for_deletion(
             self.conf, target_conn, 'fact_asmt_outcome_vw')
         proxy_rows = move_to_target.yield_records_to_be_deleted(
             prod_conn,
             'fact_asmt_outcome_vw',
             records_to_be_deleted,
             batch_size=10)
         for rows in proxy_rows:
             move_to_target.update_rec_id_for_records_to_delete(
                 self.conf, target_conn, 'fact_asmt_outcome_vw', rows)
         mismatches = move_to_target.get_records_marked_for_deletion(
             self.conf, target_conn, 'fact_asmt_outcome_vw')
         self.assertEqual(0, len(mismatches))
Ejemplo n.º 30
0
 def validate_prepod_tables(self, schema_name):
     with get_target_connection(self.tenant, schema_name) as connection:
         fact_table = connection.get_table('fact_asmt_outcome_vw')
         dim_inst_hier = connection.get_table('dim_inst_hier')
         dim_student = connection.get_table('dim_student')
         dim_asmt = connection.get_table('dim_asmt')
         dim_tables = [dim_asmt, dim_student, dim_inst_hier]
         for table in dim_tables:
             query = select([table.c.rec_status])
             result = connection.execute(query).fetchall()
             expected_status = [('S', )]
             self.assertEqual(
                 result, expected_status,
                 "Error: In Dim_tables rec_status have not change to S")
         fact_table_data = select([fact_table.c.rec_status])
         fact_table_rows = connection.execute(fact_table_data).fetchall()
         expected_status = [('C', )]
         self.assertEquals(fact_table_rows, expected_status, "Error")
Ejemplo n.º 31
0
def handle_updates_and_deletes(conf):
    """Main handler for updates and deletes


    The method grabs tables to be processed for updates and deletes (Fact tables)
    and runs process_records_to_be_deleted for each table

    :param conf: udl configuration object
    """
    with get_target_connection(
            conf[mk.TENANT_NAME],
            conf[mk.TARGET_DB_SCHEMA]) as target_conn, get_prod_connection(
                conf[mk.TENANT_NAME]) as prod_conn:
        tables = get_tables_starting_with(target_conn.get_metadata(),
                                          Constants.FACT_TABLES_PREFIX)
        for table_name in tables:
            process_records_to_be_deleted(conf, target_conn, prod_conn,
                                          table_name)
 def validate_edware_database(self):
     with get_target_connection('cat', self.guid_batch_id) as ed_connector:
         edware_table = ed_connector.get_table(FACT_TABLE)
         output = select([
             edware_table.c.batch_guid
         ]).where(edware_table.c.batch_guid == self.guid_batch_id)
         output_val = select([
             edware_table.c.student_rec_id
         ]).where(edware_table.c.batch_guid == self.guid_batch_id)
         output_data = ed_connector.execute(output).fetchall()
         output_data1 = ed_connector.execute(output_val).fetchall()
         # Velidate that student_rec_id not containing -1
         self.assertNotIn(-1, output_data1,
                          "Student rec id in not -1 in fact_asmt")
         # Velidate that data loaded into fact_Table after pipeline
         row_count = len(output_data)
         self.assertGreater(row_count, 1, "Data is loaded to star shema")
         truple_str = (self.guid_batch_id, )
         self.assertIn(truple_str, output_data, "assert successful")
 def validate_student_data(self, file_to_load):
     with get_target_connection('cat', self.batch_id) as conn:
         student = self.student_reg_files[file_to_load]['test_student']
         target_table = conn.get_table(Constants.SR_TARGET_TABLE)
         query = select([target_table.c.state_name, target_table.c.district_name, target_table.c.school_id,
                         target_table.c.sex, target_table.c.birthdate, target_table.c.dmg_eth_hsp,
                         target_table.c.dmg_prg_504, target_table.c.academic_year, target_table.c.reg_system_id],
                        and_(target_table.c.student_id == student['student_id'], target_table.c.batch_guid == self.batch_id))
         result = conn.execute(query).fetchall()
         student_data_tuple = result[0]
         self.assertEquals(student_data_tuple[0], student['state_name_col'], 'State Name did not match')
         self.assertEquals(student_data_tuple[1], student['district_name_col'], 'District Name did not match')
         self.assertEquals(student_data_tuple[2], student['school_id_col'], 'School Id did not match')
         self.assertEquals(student_data_tuple[3], student['sex_col'], 'Sex did not match')
         self.assertEquals(student_data_tuple[4], datetime.strptime(student['dob_col'], "%Y-%m-%d").strftime("%Y%m%d"), 'Date of Birth did not match')
         self.assertEquals(student_data_tuple[5], student['eth_hsp_col'], 'Hispanic Ethnicity should be true')
         self.assertEquals(student_data_tuple[6], student['sec504_col'], 'Section504 status should be false')
         self.assertEquals(student_data_tuple[7], student['year_col'], 'Academic Year did not match')
         self.assertEquals(student_data_tuple[8], student['reg_sys_id_col'], 'Test registration system\'s id did not match')
 def validate_udl_stats(self):
     with StatsDBConnection() as conn:
         table = conn.get_table('udl_stats')
         query = select([table.c.load_status])
         result = conn.execute(query).fetchall()
         expected_result = [('migrate.ingested',), ('migrate.failed',)]
         # Validate that one migration batch is failed and one is successful.
         self.assertEquals(result, expected_result)
         # Validate that schema clean up is successful for both the scenario: Migration.ingested and migration.failed
         schema_name = select([table.c.batch_guid])
         schema_name_result = conn.execute(schema_name).fetchall()
         tpl_schema_name_1 = schema_name_result[0]
         tpl_schema_name_2 = schema_name_result[1]
         schema_name_1 = tpl_schema_name_1[0]
         schema_name_2 = tpl_schema_name_2[0]
         with get_target_connection(self.tenant) as connector:
                 query = select(['schema_name'], from_obj=['information_schema.schemata']).where('schema_name in :a')
                 params = [bindparam('a', (schema_name_1, schema_name_2))]
                 new_query = text(str(query), bindparams=params)
                 result = connector.execute(new_query).fetchall()
                 self.assertEqual(len(result), 0, "Schema clean up after migration is unsuccessful")
    def validate_edware_database(self, schema_name):
        with get_target_connection(self.tenant, schema_name) as ed_connector:
            fact_table_vw = ed_connector.get_table('fact_asmt_outcome_vw')
            prod_output_data = select([fact_table_vw.c.rec_status]).where(
                fact_table_vw.c.student_id ==
                '3efe8485-9c16-4381-ab78-692353104cce', )
            prod_output_table = ed_connector.execute(
                prod_output_data).fetchall()
            expected_status_val_D = [('D', )]
            self.assertEquals(
                prod_output_table, expected_status_val_D,
                'Status is wrong in fact table for delete record')

            fact_table = ed_connector.get_table('fact_asmt_outcome')
            prod_data = select([fact_table.c.rec_status]).where(
                fact_table.c.student_id ==
                '3efe8485-9c16-4381-ab78-692353104cce', )
            prod_table = ed_connector.execute(prod_data).fetchall()
            self.assertEquals(
                prod_table, expected_status_val_D,
                'Status is wrong in fact_asmt_table_primary for delete record')
Ejemplo n.º 36
0
    def get_star_schema_demographic_counts(self, tenant, schema):
        demographics = ['dmg_eth_hsp', 'dmg_eth_ami', 'dmg_eth_asn', 'dmg_eth_blk', 'dmg_eth_pcf', 'dmg_eth_wht', 'dmg_eth_2om',
                        'dmg_prg_iep', 'dmg_prg_lep', 'dmg_prg_504', 'dmg_sts_ecd', 'dmg_sts_mig']
        results_dict = {}
        with get_target_connection(tenant, schema) as conn:
            fact = conn.get_table('fact_asmt_outcome_vw')
            for entry in demographics:
                demo_query = select([func.count(fact.c[entry])], from_obj=fact).where(fact.c[entry] == true())
                result = conn.execute(demo_query)
                for row in result:
                    demo_count = row[0]

                results_dict[entry] = demo_count

                #get derived ethnicity
                eth_query = select([func.count(fact.c.dmg_eth_derived)], from_obj=fact).where(fact.c.dmg_eth_derived is not None)
                result = conn.execute(eth_query)
                for row in result:
                    derived_count = row[0]
                results_dict['dmg_eth_derived'] = derived_count

        return results_dict
Ejemplo n.º 37
0
def move_data_from_int_tables_to_target_table(conf, task_name, source_tables, target_table):
    """Move student registration data from source integration tables to target table.
    Source tables are INT_STU_REG and INT_STU_REG_META. Target table is student_registration.

    @param conf: Configuration for particular load type (assessment or studentregistration)
    @param task_name: Name of the celery task invoking this method
    @param source_tables: Names of the source tables from where the data comes
    @param target_table: Name of the target table to where the data should be moved

    @return: Number of inserted rows
    """
    column_and_type_mapping = get_column_and_type_mapping(conf, task_name, target_table, source_tables)

    with get_target_connection(conf[mk.TENANT_NAME], conf[mk.GUID_BATCH]) as conn_to_target_db:
        insert_query = create_sr_table_select_insert_query(conf, target_table, column_and_type_mapping)
        logger.info(insert_query)
        affected_rows = execute_udl_queries(conn_to_target_db, [insert_query],
                                            'Exception -- moving data from integration {int_table} to target {target_table}'
                                            .format(int_table=source_tables[0], target_table=target_table),
                                            'move_to_target', 'move_data_from_int_tables_to_target_table')

    return affected_rows
    def validate_edware_database(self, schema_name):
        with get_target_connection(self.tenant, schema_name) as ed_connector:
            fact_table = ed_connector.get_table('fact_asmt_outcome_vw')
            pre_prod_data = select([fact_table.c.student_rec_id, fact_table.c.inst_hier_rec_id, fact_table.c.asmt_rec_id])
            pre_prod_table = ed_connector.execute(pre_prod_data).fetchall()
            self.expected_pk_val_for_student = pre_prod_table[0][0]
            self.expected_pk_val_for_inst = pre_prod_table[0][1]
            self.expected_pk_val_for_asmt = pre_prod_table[0][2]

            lean_table = ed_connector.get_table('fact_asmt_outcome')
            pre_prod_data = select([lean_table.c.student_rec_id, fact_table.c.inst_hier_rec_id, fact_table.c.asmt_rec_id])
            pre_prod_table = ed_connector.execute(pre_prod_data).fetchall()
            self.lean_pk_val_for_student = pre_prod_table[0][0]
            self.lean_pk_val_for_inst = pre_prod_table[0][1]
            self.lean_pk_val_for_asmt = pre_prod_table[0][2]

            dim_student = ed_connector.get_table('dim_student')
            dim_student_data = select([dim_student.c.student_rec_id])
            result_student_id = ed_connector.execute(dim_student_data).fetchall()
            self.expected_student_id = result_student_id[0][0]

            dim_inst_hier = ed_connector.get_table('dim_inst_hier')
            dim_inst_hier_data = select([dim_inst_hier.c.inst_hier_rec_id])
            dim_inst_hier_result = ed_connector.execute(dim_inst_hier_data).fetchall()
            self.expected_inst_hier_rec_id = dim_inst_hier_result[0][0]

            dim_asmt = ed_connector.get_table('dim_asmt')
            dim_asmt_data = select([dim_asmt.c.asmt_rec_id])
            dim_asmt_result = ed_connector.execute(dim_asmt_data).fetchall()
            self.expected_asmt_rec_id = dim_asmt_result[0][0]

            self.assertEqual(self.expected_student_id, self.expected_pk_val_for_student, "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(self.expected_inst_hier_rec_id, self.expected_pk_val_for_inst, "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(self.expected_asmt_rec_id, self.expected_pk_val_for_asmt, "Error:student_rec_id of dim table not maching to fact table")

            self.assertEqual(self.expected_student_id, self.lean_pk_val_for_student, "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(self.expected_inst_hier_rec_id, self.lean_pk_val_for_inst, "Error:student_rec_id of dim table not maching to fact table")
            self.assertEqual(self.expected_asmt_rec_id, self.lean_pk_val_for_asmt, "Error:student_rec_id of dim table not maching to fact table")
Ejemplo n.º 39
0
def get_asmt_rec_id(guid_batch, tenant_name, asmt_rec_id_info):
    '''
    Returns asmt_rec_id from dim_asmt table
    Steps:
    1. Get guid_asmt from integration table INT_SBAC_ASMT
    2. Select asmt_rec_id from dim_asmt by the same guid_amst got from 1. It should have 1 value
    '''
    source_table_name = asmt_rec_id_info['source_table']
    guid_column_name_in_source = asmt_rec_id_info['guid_column_in_source']
    target_table_name = asmt_rec_id_info['target_table']
    guid_column_name_in_target = asmt_rec_id_info['guid_column_name']
    rec_id_column_name = asmt_rec_id_info['rec_id']

    # connect to integration table, to get the value of guid_asmt
    with get_udl_connection() as udl_conn:
        int_table = udl_conn.get_table(source_table_name)
        query = select([int_table.c[guid_column_name_in_source]],
                       from_obj=int_table,
                       limit=1)
        query = query.where(int_table.c['guid_batch'] == guid_batch)
        results = udl_conn.get_result(query)
        if results:
            guid_column_value = results[0][guid_column_name_in_source]

    # connect to target table, to get the value of asmt_rec_id
    with get_target_connection(tenant_name, guid_batch) as target_conn:
        dim_asmt = target_conn.get_table(target_table_name)
        query = select([dim_asmt.c[rec_id_column_name]],
                       from_obj=dim_asmt,
                       limit=1)
        query = query.where(
            dim_asmt.c[guid_column_name_in_target] == guid_column_value)
        query = query.where(and_(dim_asmt.c['batch_guid'] == guid_batch))
        results = target_conn.get_result(query)
        if results:
            asmt_rec_id = results[0][rec_id_column_name]

    return asmt_rec_id
Ejemplo n.º 40
0
    def get_edware_asmt_score_avgs(self, tenant, schema):
        with get_target_connection(tenant, schema) as conn:
            fact = conn.get_table('fact_asmt_outcome_vw')
            query = select([func.avg(fact.c.asmt_score),
                            func.avg(fact.c.asmt_score_range_min),
                            func.avg(fact.c.asmt_score_range_max),
                            func.avg(fact.c.asmt_claim_1_score),
                            func.avg(fact.c.asmt_claim_1_score_range_min),
                            func.avg(fact.c.asmt_claim_1_score_range_max),
                            func.avg(fact.c.asmt_claim_2_score),
                            func.avg(fact.c.asmt_claim_2_score_range_min),
                            func.avg(fact.c.asmt_claim_2_score_range_max),
                            func.avg(fact.c.asmt_claim_3_score),
                            func.avg(fact.c.asmt_claim_3_score_range_min),
                            func.avg(fact.c.asmt_claim_3_score_range_max),
                            func.avg(fact.c.asmt_claim_4_score),
                            func.avg(fact.c.asmt_claim_4_score_range_min),
                            func.avg(fact.c.asmt_claim_4_score_range_max)], from_obj=fact)
            result = conn.execute(query)
            for row in result:
                star_asmt_avgs = row

            return star_asmt_avgs
    def validate_edware_database(self, schema_name):
        with get_target_connection('cat', schema_name) as ed_connector:
            fact_table = ed_connector.get_table('fact_asmt_outcome_vw')
            delete_output_data = select([fact_table.c.rec_status]).where(fact_table.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce')
            delete_output_table = ed_connector.execute(delete_output_data).fetchall()
            expected_status_val_D = [('D',)]
            #verify delete record
            self.assertEquals(delete_output_table, expected_status_val_D, 'Status is wrong in fact table for delete record')
            #Verify Update record
            update_output_data = select([fact_table.c.rec_status]).where(fact_table.c.student_id == '34b99412-fd5b-48f0-8ce8-f8ca3788634a')
            update_output_table = ed_connector.execute(update_output_data).fetchall()
            self.assertIn(('D',), update_output_table, "Delete status D is not found in the Update record")
            self.assertIn(('C',), update_output_table, "Insert status C is not found in the Update record")

            # Validate that upadte of asmt_score(1509 to 1500) is successful for student with student_id =779e658d-de44-4c9e-ac97-ea366722a94c
            update_asmt_score = select([fact_table.c.asmt_score], and_(fact_table.c.student_id == '34b99412-fd5b-48f0-8ce8-f8ca3788634a', fact_table.c.rec_status == 'C'))
            new_asmt_score = ed_connector.execute(update_asmt_score).fetchall()
            expected_asmt_score = [(1500,)]
            self.assertEquals(new_asmt_score, expected_asmt_score)

            # Validate that delete and update also works for fact_asmt_outcome
            fact_asmt = ed_connector.get_table('fact_asmt_outcome')
            output_data = select([fact_asmt.c.rec_status]).where(fact_asmt.c.student_id == '3efe8485-9c16-4381-ab78-692353104cce')
            output_table = ed_connector.execute(output_data).fetchall()
            #verify delete record
            self.assertEquals(output_table, expected_status_val_D, 'Status is wrong in fact table for delete record')
            #Verify Update record
            update_data = select([fact_asmt.c.rec_status]).where(fact_asmt.c.student_id == '34b99412-fd5b-48f0-8ce8-f8ca3788634a')
            update_table = ed_connector.execute(update_data).fetchall()
            self.assertIn(('D',), update_table, "Delete status D is not found in the Update record")
            self.assertIn(('C',), update_table, "Insert status C is not found in the Update record")

            # Validate that upadte of asmt_score(1509 to 1500) is successful for student with student_id =779e658d-de44-4c9e-ac97-ea366722a94c
            update_score = select([fact_asmt.c.asmt_score], and_(fact_asmt.c.student_id == '34b99412-fd5b-48f0-8ce8-f8ca3788634a', fact_asmt.c.rec_status == 'C'))
            new_score = ed_connector.execute(update_score).fetchall()
            self.assertEquals(new_score, expected_asmt_score)
 def test_check_mismatched_deletions(self):
     with get_target_connection(self.tenant_code, self.guid_batch) as conn:
         self.assertRaises(DeleteRecordNotFound, move_to_target.check_mismatched_deletions(self.conf, conn, 'fact_asmt_outcome_vw'))
Ejemplo n.º 43
0
def drop_target_schema(tenant, schema_name):
    with get_target_connection(tenant) as connector:
        try:
            connector.execute(DropSchema(schema_name, cascade=True))
        except:
            pass
Ejemplo n.º 44
0
 def truncate_edware_tables(self, schema, tenant='cat'):
     with get_target_connection(tenant, schema_name=schema) as conn:
         metadata = conn.get_metadata()
         for table in reversed(metadata.sorted_tables):
             conn.execute(table.delete())
    def validate_edware_database(self, schema_name):
        with get_target_connection('cat', schema_name) as ed_connector:
            fact_table = ed_connector.get_table('fact_asmt_outcome_vw')
            delete_output_data = select([
                fact_table.c.rec_status
            ]).where(fact_table.c.student_id ==
                     '3efe8485-9c16-4381-ab78-692353104cce')
            delete_output_table = ed_connector.execute(
                delete_output_data).fetchall()
            expected_status_val_D = [('D', )]
            #verify delete record
            self.assertEquals(
                delete_output_table, expected_status_val_D,
                'Status is wrong in fact table for delete record')
            #Verify Update record
            update_output_data = select([
                fact_table.c.rec_status
            ]).where(fact_table.c.student_id ==
                     '34b99412-fd5b-48f0-8ce8-f8ca3788634a')
            update_output_table = ed_connector.execute(
                update_output_data).fetchall()
            self.assertIn(('D', ), update_output_table,
                          "Delete status D is not found in the Update record")
            self.assertIn(('C', ), update_output_table,
                          "Insert status C is not found in the Update record")

            # Validate that upadte of asmt_score(1509 to 1500) is successful for student with student_id =779e658d-de44-4c9e-ac97-ea366722a94c
            update_asmt_score = select(
                [fact_table.c.asmt_score],
                and_(
                    fact_table.c.student_id ==
                    '34b99412-fd5b-48f0-8ce8-f8ca3788634a',
                    fact_table.c.rec_status == 'C'))
            new_asmt_score = ed_connector.execute(update_asmt_score).fetchall()
            expected_asmt_score = [(1500, )]
            self.assertEquals(new_asmt_score, expected_asmt_score)

            # Validate that delete and update also works for fact_asmt_outcome
            fact_asmt = ed_connector.get_table('fact_asmt_outcome')
            output_data = select([
                fact_asmt.c.rec_status
            ]).where(fact_asmt.c.student_id ==
                     '3efe8485-9c16-4381-ab78-692353104cce')
            output_table = ed_connector.execute(output_data).fetchall()
            #verify delete record
            self.assertEquals(
                output_table, expected_status_val_D,
                'Status is wrong in fact table for delete record')
            #Verify Update record
            update_data = select([
                fact_asmt.c.rec_status
            ]).where(fact_asmt.c.student_id ==
                     '34b99412-fd5b-48f0-8ce8-f8ca3788634a')
            update_table = ed_connector.execute(update_data).fetchall()
            self.assertIn(('D', ), update_table,
                          "Delete status D is not found in the Update record")
            self.assertIn(('C', ), update_table,
                          "Insert status C is not found in the Update record")

            # Validate that upadte of asmt_score(1509 to 1500) is successful for student with student_id =779e658d-de44-4c9e-ac97-ea366722a94c
            update_score = select([fact_asmt.c.asmt_score],
                                  and_(
                                      fact_asmt.c.student_id ==
                                      '34b99412-fd5b-48f0-8ce8-f8ca3788634a',
                                      fact_asmt.c.rec_status == 'C'))
            new_score = ed_connector.execute(update_score).fetchall()
            self.assertEquals(new_score, expected_asmt_score)
 def validate_stu_reg_target_table(self, file_to_load):
     with get_target_connection('cat', self.batch_id) as conn:
         target_table = conn.get_table(Constants.SR_TARGET_TABLE)
         query = select([func.count()]).select_from(target_table)
         record_count = conn.execute(query).fetchall()[0][0]
         self.assertEqual(record_count, self.student_reg_files[file_to_load]['num_records_in_data_file'], 'Unexpected number of records in target table.')
 def verify_multi_tenancy(self):
     with get_target_connection('nc', self.guid_batch_id) as ed_connector:
         edware_table = ed_connector.get_table('dim_asmt')
         query_dim_table = select([edware_table])
         result_dim_table = ed_connector.execute(query_dim_table).fetchall()
         self.assertEquals(len(result_dim_table), 1, "Data not loaded into preprod")
Ejemplo n.º 48
0
def drop_target_schema(tenant, schema_name):
    with get_target_connection(tenant) as connector:
        try:
            connector.execute(DropSchema(schema_name, cascade=True))
        except:
            pass
 def test_get_records_for_deletion(self):
     self.load_int_to_star()
     self.assertEqual(4, self.count_rows())
     with get_target_connection(MatchAndDeleteFTest.tenant_code, MatchAndDeleteFTest.guid_batch) as target_conn:
         records_to_be_deleted = move_to_target.get_records_marked_for_deletion(self.conf, target_conn, 'fact_asmt_outcome_vw')
         self.assertEqual(4, len(records_to_be_deleted))
 def test_check_mismatched_deletions(self):
     with get_target_connection(self.tenant_code, self.guid_batch) as conn:
         self.assertRaises(
             DeleteRecordNotFound,
             move_to_target.check_mismatched_deletions(
                 self.conf, conn, 'fact_asmt_outcome_vw'))
Ejemplo n.º 51
0
 def drop_target_schema(cls, tenant, guid_batch):
     with get_target_connection(tenant) as connector:
         drop_schema(connector, guid_batch)
Ejemplo n.º 52
0
def create_target_schema_for_batch(tenant, schema_name):
    """
    creates the target star schema needed for this batch
    """
    with get_target_connection(tenant) as conn:
        create_schema(conn, generate_ed_metadata, schema_name)
Ejemplo n.º 53
0
def create_target_schema_for_batch(tenant, schema_name):
    """
    creates the target star schema needed for this batch
    """
    with get_target_connection(tenant) as conn:
        create_schema(conn, generate_ed_metadata, schema_name)
Ejemplo n.º 54
0
 def create_schema_for_target(cls, tenant, guid_batch):
     with get_target_connection(tenant) as connector:
         create_schema(connector, generate_ed_metadata, guid_batch)
Ejemplo n.º 55
0
 def create_schema_for_target(cls, tenant, guid_batch):
     with get_target_connection(tenant) as connector:
         create_schema(connector, generate_ed_metadata, guid_batch)
Ejemplo n.º 56
0
 def drop_target_schema(cls, tenant, guid_batch):
     with get_target_connection(tenant) as connector:
         drop_schema(connector, guid_batch)