Exemple #1
0
def complementary_merge(run_context, bookmap_location_file_name, bookmap_sheet,
                        mergespec_file_name, input_table_names,
                        output_table_names):
    db_context = run_context.getDBContext()
    temp_bookmap_table_name = dbutilities.get_temp_table(db_context).table_name
    BookMapReader(inputfile=bookmap_location_file_name,
                  inputsheet=bookmap_sheet,
                  read_to_db=True,
                  db_context=db_context,
                  outputTable=temp_bookmap_table_name)
    temp_prep_table_name = dbutilities.get_temp_table(db_context).table_name
    _comp_prepare1(run_context=run_context,
                   bookmaps_table_name=temp_bookmap_table_name,
                   input_table_names=input_table_names,
                   output_table_name=temp_prep_table_name)

    prepped_input_table_names = {}
    for key in input_table_names:
        if key == FLAT_TABLE_KEY_NAME:
            prepped_input_table_names[key] = temp_prep_table_name
        else:
            prepped_input_table_names[key] = input_table_names[key]

    cmpmrg = ComplementaryMerge(run_context=run_context)
    cmpmrg.identity_insert = False
    cmpmrg.complementary_merge(mergespec_file=mergespec_file_name,
                               input_table_names=prepped_input_table_names,
                               output_table_names=output_table_names)
 def read_oat_agg_sheet(self):
     print "Reading aggregation sheet"
     table = dbutilities.get_temp_table(self.db_context)
     data_file = os.path.join(self.data_dir, _AGG_FILE_OAT)
     reader = SafeExcelReader(self.run_context,
                              data_file,
                              _XLS_SHEET,
                              table,
                              self.db_context,
                              scan_all=True)
     reader.createTable()
     table.populate_from_connection()
     return table
    def do_17_18(self, tweak_agg_sheet_query):
        with self.read_g3() as g3, \
             self.read_oat_agg_sheet() as temp_agg_sheet, \
             dbutilities.get_temp_table(self.db_context) as agg_sheet:

            ## Keep first row of aggregation definition
            self.db_context.executeNoResults(
                "SELECT TOP(1) * INTO {agg_sheet} FROM {temp_table} WHERE [subject]='R' ORDER BY [import_order]"
                .format(agg_sheet=agg_sheet, temp_table=temp_agg_sheet))

            print "Running first ttest"
            ttester1 = TTest(g3, self.db_context, agg_sheet, None, False)
            ttester1.readAggData()
            ttester1.execute()
            results1 = []
            for level in ttester1.target_levels:
                results1.append(dbutilities.dump(level.output_table, level.id))

            print "Updating data sheet"
            self.db_context.executeNoResults(
                "DELETE FROM {g3} WHERE [inclusionflagr] IS NULL OR [inclusionflagr] != 1"
                .format(g3=g3))
            self.db_context.executeNoResults(
                tweak_agg_sheet_query.format(agg_sheet=agg_sheet))

            print "Running second ttest"
            ttester2 = TTest(g3, self.db_context, agg_sheet, None, False)
            ttester2.readAggData()
            ttester2.execute()
            results2 = []
            for level in ttester2.target_levels:
                results2.append(dbutilities.dump(level.output_table, level.id))

            print "Comparing ttest outputs"
            assert len(results1) >= 1
            assert len(results1) == len(results2)
            for i in range(len(results1)):
                res1 = results1[i]
                res2 = results2[i]
                keys = res1[0].keys()

                assert len(res1) == len(res2)
                for j in range(len(res2)):
                    row1 = res1[j]
                    row2 = res2[j]
                    for k in keys:
                        assert row1[k] == row2[k]
Exemple #4
0
def init_temp_tables( db_context ):
    temp_tables = { }
    if not db_context:
        raise ValueError( 'db_context is null or empty.' )
    temp_tables[ 'counts' ] = dbutilities.get_temp_table( db_context ).table_name
    temp_tables[ 'result' ] = dbutilities.get_temp_table( db_context ).table_name
    temp_tables[ 'resultc' ] = dbutilities.get_temp_table( db_context ).table_name
    temp_tables[ 'total' ] = dbutilities.get_temp_table( db_context ).table_name
    temp_tables[ 'totalc' ] = dbutilities.get_temp_table( db_context ).table_name
    temp_tables[ 'allfive' ] = dbutilities.get_temp_table( db_context ).table_name
    return temp_tables
    def read_g3(self):
        print "Reading data"
        table = dbutilities.get_temp_table(self.db_context)
        data_file = os.path.join(self.data_dir, _XLS_FILE_G3)
        reader = SafeExcelReader(self.run_context,
                                 data_file,
                                 _XLS_SHEET,
                                 table,
                                 self.db_context,
                                 scan_all=False)
        reader.createTable()

        print "Tweaking data"
        self.db_context.executeNoResults(
            "UPDATE {g3} SET [state_inc_flag] = CASE WHEN ( [schtype] IN ('N','D','H') ) THEN 0 ELSE 1 END"
            .format(g3=table))
        self.db_context.executeNoResults(
            "UPDATE {g3} SET [Rclass_missing_flag] = CASE WHEN ( [Rclass_id] IS NULL OR [Rclass_id]='' ) THEN 1 ELSE 0 END"
            .format(g3=table))
        table.populate_from_connection()
        return table
    def test_21(self):
        answer_dir = os.path.join(self.answer_dir, 'test_21')
        if not os.path.exists(answer_dir):
            os.makedirs(answer_dir)
        specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_21')
        result = True

        with self.read_g3() as g3, \
             self.read_oat_agg_sheet() as agg_sheet, \
             dbutilities.get_temp_table( self.db_context ) as tmp, \
             dbutilities.get_temp_table( self.db_context ) as tmp_agg :

            # As near as I can tell, the SAS test only runs for the tenth row of the agg sheet.
            # self.db_context.executeNoResults("DELETE FROM {agg_sheet} WHERE [import_order] != 10".format( agg_sheet=agg_sheet ))

            # We are just using this TTest instance to read the aggregation sheet. The actual ttest will use
            # another instance based on a truncated aggregation sheet.
            agg_sheet_reader = TTest(g3, self.db_context, agg_sheet, None,
                                     False)
            agg_sheet_reader.readAggData()

            assert dbutilities.table_exists(g3)

            targetParentRow = []
            for target_level in agg_sheet_reader.target_levels:
                for parent_level in target_level.contents:
                    for row in parent_level.contents:
                        targetParentRow.append(
                            (target_level, parent_level, row))

            targetParentRow.sort(key=lambda (row): row[2].import_order)

            for target_level, parent_level, row in targetParentRow:
                where_t = target_level.get_where_expression()
                target_id = target_level.id
                where_p = parent_level.get_where_expression()
                parent_id = parent_level.id
                i = row.import_order

                # Reduce the data to the desired sample
                dbutilities.drop_table_if_exists(tmp)
                query = """
                SELECT {vars},
                        COUNT( {input_var} ) OVER( PARTITION BY {parent_id}, {target_id} ) AS n_target,
                        0 AS n_parent
                INTO {tmp}
                FROM {g3}
                WHERE {where_t}
                """.format(parent_id=parent_id,
                           target_id=target_id,
                           input_var=row.inputvar,
                           where_t=where_t,
                           tmp=tmp,
                           g3=g3,
                           vars=Joiner(g3))
                self.db_context.executeNoResults(query)
                query = """
                UPDATE {tmp} SET n_parent = A.B FROM (
                    SELECT n_parent, COUNT( {input_var} ) OVER( PARTITION BY {parent_id} ) AS B
                    FROM {tmp}
                    WHERE {where_p}
                ) AS A
                """.format(parent_id=parent_id,
                           input_var=row.inputvar,
                           where_p=where_p,
                           tmp=tmp)
                print query
                self.db_context.executeNoResults(query)
                query = "DELETE FROM {tmp} WHERE ( n_parent != 2 ) OR ( n_target != 1 )".format(
                    tmp=tmp)
                self.db_context.executeNoResults(query)
                n_obs = dbutilities.n_obs(tmp)
                if n_obs > 0:

                    # Reduce the aggregation sheet to the current row
                    query = "SELECT * INTO {tmp_agg} FROM {agg_sheet} WHERE [import_order]={i}".format(
                        tmp_agg=tmp_agg, agg_sheet=agg_sheet, i=i)
                    self.db_context.executeNoResults(query)

                    # Do the ttest
                    ttester = TTest(tmp, self.db_context, tmp_agg, None, False)
                    ttester.readAggData()
                    ttester.execute()

                    # Check the answer
                    answer_file = os.path.join(
                        answer_dir, 'row_{0}_comparison.log'.format(i))
                    specimen_file = os.path.join(
                        specimen_dir, 'test_21_ttest_{0}.xls'.format(i))
                    result_i = self.compare_output(specimen_file, target_level,
                                                   answer_file)
                    result = result and result_i
                    print "{1} ttest test_21 for {0}".format(
                        i, 'PASSED' if result_i else 'FAILED')

                    self.assertTrue(result, "TTest Test 21 FAILED")
                    return