Example #1
0
 def test_valid_mergespec_file_output_format(self):
     self._log.info(
         'test_valid_mergespec_file_output_format() - testing...')
     valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                        DS_INPUT_FILE_VALID)
     input_tables = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
     dbutilities.drop_table_if_exists(
         db_context=self._run_context.getDBContext(),
         table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader = SafeExcelReader(
         run_context=self._run_context,
         filename=valid_ds_input_file,
         sheet_name=DS_INPUT_FILE_SHEET_VALID,
         db_context=self._run_context.getDBContext(),
         output_table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader.createTable()
     mergespec_file = 'C:/{}'.format(MERGESPEC_OUTPUT_FILE)
     if os.path.exists(mergespec_file):
         os.remove(mergespec_file)
     cmpmrg = ComplementaryMerge(run_context=self._run_context)
     cmpmrg.create_mergespec_file(input_table_names=input_tables,
                                  new_mergespec_file=mergespec_file)
     self._log.info('test_valid_mergespec_file_output_format() - tested.')
     filename1 = mergespec_file
     filename2 = os.path.join(self._run_context.tests_safe_dir,
                              COMPARE_FILE_MERGESPEC_OUTPUT)
     assert filecmp.cmp(
         filename1,
         filename2), 'Python test output does not match expected output.'
    def setUp(self):
        
        self.run_context = SuiteContext( "unittest" )
        self.db_context = self.run_context.getDBContext( "unittest" )
        self.static_context = self.run_context.getDBContext( "static" )

        self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' )
        if not os.path.exists( self.answer_dir ):
            os.makedirs( self.answer_dir )
        
        self.data_dir = os.path.join( self.run_context.tests_safe_dir,
                                      'ttest', 'input_data' )
        
        self.specimen_dir = os.path.join( self.run_context.tests_safe_dir,
                                      'ttest', 'sas_outputs' )

        # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel";
        # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls;
        # %let sheet=ttestlevel;
        # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds);
        self.run_context.debug( "Reading data for ttest_level tests" )
        agg_file = os.path.join( self.data_dir, _AGG_FILE)
        reader = SafeExcelReader( self.run_context,
                agg_file, "ttestlevel", scan_all = True )
        self.agg_ds = [ row for row in reader.getRows() ]
        
        # Import the input datasets
        reader.db_context = self.static_context
        for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES:
            if not table_exists( table_name, self.static_context ):
                self.run_context.debug( "Reading data for grade {}".format( grade ) )
                reader.filename = os.path.join( self.data_dir, filename )
                reader.outputTable = table_name
                reader.sheetName = sheet_name
                reader.createTable()
    def test_10(self):
        data_file = os.path.join(self.data_dir, _XLS_FILE)
        reader = SafeExcelReader(self.run_context,
                                 data_file,
                                 _XLS_SHEET,
                                 'class_info',
                                 self.db_context,
                                 scan_all=True)
        reader.createTable()
        ttester = TTest('class_info', self.db_context,
                        os.path.join(self.data_dir, _AGG_FILE), 0, True)
        ttester.readAggData()
        ttester.execute()

        answer_dir = os.path.join(self.answer_dir, 'test_10')
        if not os.path.exists(answer_dir):
            os.makedirs(answer_dir)
        answer_file = os.path.join(answer_dir, 'comparison.log')
        specimen_dir = os.path.join(self.specimen_dir, 'ttest_test_10')
        compare_function = lambda row: (int(row.tcrxid), int(row.comp))
        result = compare_tables(answer_file,
                                table="ttest_class",
                                specimen_name=os.path.join(
                                    specimen_dir, 'testresult.XLS'),
                                columns=COLUMNS,
                                table_key_function=compare_function,
                                specimen_key_function=compare_function,
                                db_context=self.db_context)

        self.assertTrue(result, "TTest Test 10 FAILED")
Example #4
0
    def test_merge_nonfuzzy(self):
        self._log.info('test_merge_nonfuzzy - testing...')
        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_VALID_NONFUZZY)
        valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                           DS_INPUT_FILE_VALID)

        input_tables = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
        output_tables = {
            FLAT_TABLE_KEY_NAME: VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY
        }

        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_INPUT_TABLE_NAME)
        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY)
        for subject in output_tables:
            table_name = VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY + '_' + subject
            self._log.info('dropping output table [{}]'.format(table_name))
            dbutilities.drop_table_if_exists(
                db_context=self._run_context.getDBContext(), table=table_name)

        input_table_reader = SafeExcelReader(
            run_context=self._run_context,
            filename=valid_ds_input_file,
            sheet_name=DS_INPUT_FILE_SHEET_VALID,
            db_context=self._run_context.getDBContext(),
            output_table=VALID_DS_INPUT_TABLE_NAME)
        input_table_reader.createTable()

        cmpmrg = ComplementaryMerge(
            run_context=self._run_context,
            flat_table_identity_field_name='[import_order]')
        cmpmrg.complementary_merge(mergespec_file=mergespec_file,
                                   input_table_names=input_tables,
                                   output_table_names=output_tables,
                                   force_one_only=True)

        columns_str = (
            'Barcode, ID, LastName, FirstName, Score1, Race, Old, Score2, Score3, Score4, variable_priority,'
            ' record_priority, Attempt1, Attempt2, Attempt3, Attempt4, iep')
        filename1 = self._dump_table(VALID_DS_OUTPUT_TABLE_NAME_NONFUZZY,
                                     columns_str=columns_str,
                                     sort_column_nbrs=[1, 6, 0])
        filename2 = os.path.join(self._run_context.tests_safe_dir,
                                 COMPARE_FILE_NONFUZZY)
        assert filecmp.cmp(
            filename1,
            filename2), 'Python test output does not match SAS test output.'

        self._log.info('test_merge_nonfuzzy - tested.')
 def read_oat_agg_sheet(self):
     print "Reading aggregation sheet"
     table = dbutilities.get_temp_table(self.db_context)
     data_file = os.path.join(self.data_dir, _AGG_FILE_OAT)
     reader = SafeExcelReader(self.run_context,
                              data_file,
                              _XLS_SHEET,
                              table,
                              self.db_context,
                              scan_all=True)
     reader.createTable()
     table.populate_from_connection()
     return table
    def setUpData(self):
        clear_all(self.db_context)
        reader = SafeExcelReader(run_context=self.run_context,
                                 db_context=self.db_context,
                                 filename=os.path.join(
                                     self.run_context.tests_safe_dir,
                                     XLS_FILE),
                                 scan_all=True)
        reader.outputTable = reader.sheetName = "Data1"
        reader.createTable()
        reader.outputTable = reader.sheetName = "Data2"
        reader.createTable()

        self.data1 = self.db_context.getTableSpec("Data1")
        self.data2 = self.db_context.getTableSpec("Data2")
Example #7
0
 def process(self):
      
     """This is the main process that controls the flow of this module"""
      
     SE = SafeExcelReader(self.RC, self.filename, self.sheet_name, self.output_table, db_context=self.dbcontext,get_names=True, delimiter=',', import_order='import_order')
     SE.createTable()
     try:
         self._checktypes()
         self._checkemptyrows()
         self._checkinvalidvariables()
         self._checkvalidranges()
         self._recodings_and_create_dicts()
     except Exception as error:
             print 'Module:LAYOUT_CHECK Error=',error
      
     return self.layoutdict, self.maxmindict, self.recodingsdict
Example #8
0
    def testConstructor(self):
        reader = SafeExcelReader(self.runContext,
                                 filename=os.path.join(self.testDataDir,
                                                       _XLS_FILE),
                                 sheet_name='Data1',
                                 db_context=self.runContext.getDBContext(),
                                 output_table='Temp1',
                                 get_names=True,
                                 delimiter=',',
                                 import_order='import_order')
        reader.createTable()

        for name in get_table_names(self.db_context):
            self.assertEqual(
                '[temp1]', name,
                "Found name '{name}' instead of '[temp1]'".format(name=name))
Example #9
0
    def test_mergespec_file_invalid_format(self):
        self._log.info('test_mergespec_file_invalid_format() - testing...')
        valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                           DS_INPUT_FILE_VALID)
        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_INPUT_TABLE_NAME)
        dbutilities.drop_table_if_exists(
            db_context=self._run_context.getDBContext(),
            table=VALID_DS_OUTPUT_TABLE_NAME)
        input_table_reader = SafeExcelReader(
            run_context=self._run_context,
            filename=valid_ds_input_file,
            sheet_name=DS_INPUT_FILE_SHEET_VALID,
            db_context=self._run_context.getDBContext(),
            output_table=VALID_DS_INPUT_TABLE_NAME)
        input_table_reader.createTable()
        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_MISSING_COLUMN)
        input_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
        output_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_OUTPUT_TABLE_NAME}
        cmpmrg1 = ComplementaryMerge(run_context=self._run_context)
        with self.assertRaises(EOFError):
            cmpmrg1.complementary_merge(mergespec_file=mergespec_file,
                                        input_table_names=input_table_names,
                                        output_table_names=output_table_names,
                                        force_one_only=True)

        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_INVALID_COLUMN)
        cmpmrg2 = ComplementaryMerge(run_context=self._run_context)
        with self.assertRaises(ValueError):
            cmpmrg2.complementary_merge(mergespec_file=mergespec_file,
                                        input_table_names=input_table_names,
                                        output_table_names=output_table_names,
                                        force_one_only=True)
        self._log.info('test_mergespec_file_invalid_format() - tested.')

        mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                      MERGESPEC_FILE_NO_VARIABLE_PRIORITY)
        cmpmrg2 = ComplementaryMerge(run_context=self._run_context)
        with self.assertRaises(ValueError):
            cmpmrg2.complementary_merge(mergespec_file=mergespec_file,
                                        input_table_names=input_table_names,
                                        output_table_names=output_table_names,
                                        force_one_only=True)
        self._log.info('test_mergespec_file_invalid_format() - tested.')
    def read_g3(self):
        print "Reading data"
        table = dbutilities.get_temp_table(self.db_context)
        data_file = os.path.join(self.data_dir, _XLS_FILE_G3)
        reader = SafeExcelReader(self.run_context,
                                 data_file,
                                 _XLS_SHEET,
                                 table,
                                 self.db_context,
                                 scan_all=False)
        reader.createTable()

        print "Tweaking data"
        self.db_context.executeNoResults(
            "UPDATE {g3} SET [state_inc_flag] = CASE WHEN ( [schtype] IN ('N','D','H') ) THEN 0 ELSE 1 END"
            .format(g3=table))
        self.db_context.executeNoResults(
            "UPDATE {g3} SET [Rclass_missing_flag] = CASE WHEN ( [Rclass_id] IS NULL OR [Rclass_id]='' ) THEN 1 ELSE 0 END"
            .format(g3=table))
        table.populate_from_connection()
        return table
Example #11
0
 def test_ds_output_already_exists(self):
     self._log.info('test_ds_output_already_exists() - testing...')
     mergespec_file = os.path.join(self._run_context.tests_safe_dir,
                                   MERGESPEC_FILE_VALID)
     valid_ds_input_file = os.path.join(self._run_context.tests_safe_dir,
                                        DS_INPUT_FILE_VALID)
     dbutilities.drop_table_if_exists(
         db_context=self._run_context.getDBContext(),
         table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader = SafeExcelReader(
         run_context=self._run_context,
         filename=valid_ds_input_file,
         sheet_name=DS_INPUT_FILE_SHEET_VALID,
         db_context=self._run_context.getDBContext(),
         output_table=VALID_DS_INPUT_TABLE_NAME)
     input_table_reader.createTable()
     input_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
     output_table_names = {FLAT_TABLE_KEY_NAME: VALID_DS_INPUT_TABLE_NAME}
     cmpmrg = ComplementaryMerge(run_context=self._run_context)
     with self.assertRaises(IOError):
         cmpmrg.complementary_merge(mergespec_file=mergespec_file,
                                    input_table_names=input_table_names,
                                    output_table_names=output_table_names)
     self._log.info('test_ds_output_already_exists() - tested.')
    def setUp(self):
        self.run_context = SuiteContext("unittest")
        self.db_context = self.run_context.getDBContext("unittest")
        self.static_context = self.run_context.getDBContext("static")
        self.source_data_dir = self.run_context.getConfigFile(
            "TESTS", "id_generator_test_source_data_dir",
            "%(tests_safe_dir)s/id_generator_test/source_data")
        if not table_exists('student_g3', self.static_context):
            source_file = os.path.join(self.source_data_dir, _XLS_FILE)
            reader = SafeExcelReader(self.run_context,
                                     source_file,
                                     "Sheet1",
                                     'student_g3',
                                     self.static_context,
                                     scan_all=True)
            reader.createTable()

        self.answer_dir = os.path.join(self.run_context.logs_dir,
                                       'id_generator_test')
        if not os.path.exists(self.answer_dir):
            os.makedirs(self.answer_dir)

        self.specimen_dir = os.path.join(self.run_context.tests_safe_dir,
                                         'id_generator_test', 'sas_outputs')
Example #13
0
class Test(unittest.TestCase):
    def setUp(self):
        self.runContext = SuiteContext('unittest')
        self.db_context = self.runContext.getDBContext(tag='unittest')
        clear_all(self.db_context)
        self.reader = SafeExcelReader(self.runContext)
        self.reader.db_context = self.db_context
        self.testDataDir = self.runContext.tests_safe_dir
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        self.reader.outputTable = "Data1"
        self.reader.createTable()

    def test_n_obs(self):
        n = n_obs('data1', self.db_context)
        self.assertEqual(
            300, n,
            "Got wrong number of observations: {} instead of 300".format(n))

    def test_get_tablespec_1(self):
        '''Does a tablespec get returned as is
        '''
        t = self.db_context.getTableSpec("Data1")
        t2 = get_table_spec(t)
        self.assertTrue(t is t2, "Did not return the same tablespec")

    def test_get_tablespec_2(self):
        '''Does it work to specify table name and db_context?
        '''
        t = get_table_spec('some_table', self.db_context)
        self.assertTrue(isinstance(t, TableSpec),
                        "Did not return a TableSpec object")
        self.assertEquals(t.table_name, '[some_table]',
                          "Did not return the right name")
        self.assertEquals(t.table_schema, self.db_context.schema,
                          "Did not return the correct schema")

    def test_get_tablespec_3(self):
        '''Can we override the schema?
        '''
        t = get_table_spec('some_table', self.db_context, 'another_schema')
        self.assertTrue(isinstance(t, TableSpec),
                        "Did not return a TableSpec object")
        self.assertEquals(t.table_name, '[some_table]',
                          "Did not return the right name")
        self.assertEquals(t.table_schema, '[another_schema]',
                          "Did not return the correct schema")

    def test_get_tablespec_4(self):
        '''Throw error if db_context conflicts
        '''
        another_context = self.runContext.getDBContext()
        t1 = self.db_context.getTableSpec("Data1")
        try:
            t2 = get_table_spec(t1, another_context)
        except ValueError as e:
            return
        self.fail("Did not throw expected error on conflicting db_context")

    def test_get_tablespec_5(self):
        '''Throw error if schema conflicts
        '''
        t1 = self.db_context.getTableSpec("Data1")
        try:
            t2 = get_table_spec(t1, self.db_context, 'another_schema')
        except ValueError as e:
            return
        self.fail("Did not throw expected error on conflicting schema")

    def test_get_tablespec_6(self):
        '''Correctly normalizing case
        '''
        t = get_table_spec('some_TABLE', self.db_context, 'another_SCHEMA')
        self.assertTrue(isinstance(t, TableSpec),
                        "Did not return a TableSpec object")
        self.assertEquals(t.table_name, '[some_table]',
                          "Did not return the right name")
        self.assertEquals(t.table_schema, '[another_schema]',
                          "Did not return the correct schema")

    def test_get_tablespec_7(self):
        '''Throws error if no db_context specified
        '''
        try:
            t = get_table_spec('some_table')
        except ValueError as e:
            return
        self.fail("Did not throw expected error on missing db_context")

    def test_get_tablespec_8(self):
        '''No error as long as db context matches
        '''
        t = self.db_context.getTableSpec("Data1")
        t2 = get_table_spec(t)
        self.assertTrue(t is t2, "Did not return the same tablespec")

    def test_get_tablespec_9(self):
        '''No error as long as schema matches
        '''
        t = self.db_context.getTableSpec("Data1",
                                         table_schema=self.db_context.schema)
        t2 = get_table_spec(t)
        self.assertTrue(t is t2, "Did not return the same tablespec")

    def test_get_column_names(self):
        cols = get_column_names('data1', self.db_context)
        self.assertListEqual(cols, [
            '[barcode_num]', '[barcode_char]', '[studentid]', '[gender]',
            '[ethnicity]', '[studentlnm]', '[studentfnm]', '[num_1]',
            '[num_2]', '[char_1]', '[char_2]', '[n1]', '[n2]', '[import_order]'
        ], "Did not return correct column names")

    def test_get_table_names1(self):
        tables = get_table_names(self.db_context)
        self.assertListEqual(tables, ['[data1]'],
                             "Did not return correct table names")

    def test_get_table_names2(self):
        '''Should not return any tables from another schema
        '''
        tables = get_table_names(self.db_context,
                                 table_schema="another_schema")
        self.assertListEqual(tables, [], "Did not return correct table names")

    def test_get_table_names3(self):
        '''Should work with quoted schema name
        '''
        tables = get_table_names(self.db_context, table_schema="[dbo]")
        self.assertListEqual(tables, ['[data1]'],
                             "Did not return correct table names")

    def test_get_table_names4(self):
        '''Should work with unquoted schema name
        '''
        tables = get_table_names(self.db_context, table_schema="dbo")
        self.assertListEqual(tables, ['[data1]'],
                             "Did not return correct table names")

    def test_clear_all(self):
        # Create a table with a foreign key constraint
        query = "CREATE TABLE my_table( key1 bigint, var1 VARCHAR(17), FOREIGN KEY( key1 ) REFERENCES data1( import_order ) )"
        self.db_context.executeNoResults(query)
        tables = get_table_names(self.db_context)
        self.assertTrue('[data1]' in tables,
                        "Did not return correct table names")
        self.assertTrue('[my_table]' in tables,
                        "Did not return correct table names")
        self.assertEqual(len(tables), 2, "Did not return correct table names")
        clear_all(self.db_context)
        tables = get_table_names(self.db_context)
        self.assertListEqual(tables, [], "Did not return correct table names")

    def test_table_exists(self):
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that exists")
        self.assertFalse(table_exists('zaxxiz', self.db_context),
                         "Found table that does not exist")

    def test_drop_table_if_exists1(self):
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that was supposed to exist")
        t = self.db_context.getTableSpec('data1')
        drop_table_if_exists(t)
        self.assertFalse(
            table_exists(t),
            "Table was supposed to be dropped, but its still there")

    def test_drop_table_if_exists2(self):
        '''Different schema
        '''
        t = self.db_context.getTableSpec('data1')
        t.table_schema = 'not_my_schema'
        drop_table_if_exists(t)
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that was supposed to exist")

    def test_drop_table_if_exists3(self):
        '''Different name
        '''
        t = self.db_context.getTableSpec('data2')
        drop_table_if_exists(t)
        self.assertTrue(table_exists('data1', self.db_context),
                        "Failed to find table that was supposed to exist")

    def test_assembly_exists(self):
        '''Assumes that the assembly has been created by running the prep_sqlserver
        script
        '''
        self.assertTrue(assembly_exists('ToProperCase', self.db_context))
        self.assertFalse(assembly_exists('asdfasdf', self.db_context))
def peer_compare(excel='Y',
                 agg_table='',
                 agg_file='',
                 agg_sheet='',
                 indata='',
                 outdata='',
                 pc_data='',
                 crit_val=1.96,
                 db_context=None,
                 odbcconn=''):
    """ Compares Peer group Scores for OAT with a TTEST and computes the percentage 
        of people in each level for the peer group.
        
        Parameters
        ----------------
        excel : String ('Y' or 'N')
            Required. This should be 'Y' if your aggregate information is an excel file, or 'N' if it is a SQL table.
        
        agg_table : String
            If excel='N' this should be the table name that holds the aggregate information. If excel='Y'
            this is not used.
            
        agg_file : String
            If excel='Y' this should be the path to the aggregate file. If excel='N' this is not used.
            
        agg_sheet : String
            If excel='Y' this should be the sheet name in the aggregate file. If excel='N' this is not used.
            
        indata : String
            Required. This should be the SQL table name that holds the input data.
            
        outdata : String
            Required. This will be the name of the SQL table name to hold the output data.
            
        pc_data : String
            Required. This should be the name of the SQL table that holds the peer districts information.
            
        crit_val : float
            Required. This will be the the cutoff point for calculations. It must be between 1.28 and 3.89.
            
        db_context : DBContext object
            This is the DBContext within which all processing will be done. This specifies the DB connection.
            
        odbcconn : String
            This is the name of the ODBC connection you must setup to your database for R. This can be done in 
            Control Panel -> Administrative Tools -> Data Sources.
        
        Notes
        ---------------
        I had to add ISNULL(NULLIF()) and case conditions because the query is too complex for SQL Server. It was not subsetting
        the data until after doing the select, which would get incorrect values that broke the query planner. Changing all values to 
        safe values handles this, and those values that we changed are then thrown out once it subsets to the data we want to use.
        Also note for the final query we add .0000000001 to the number before rounding. This is an attempt to compensate for the
        "smart" rounding in SAS which allows for a small margin of difference (i.e. 4.4999999999999 rounding to 5).
    """
    #defining variables we will need throughout run
    yes_list = ['Y', 'YES']
    no_list = ['N', 'NO']
    tmp_aggtable_name = 'peercompare_tmp_agg'
    tmp_peertable_t_name = 'peercompare_tmp_t'
    tmp_peertable_mean_name = 'peercompare_tmp_mean'
    tmp_similar_districts_table = 'peercompare_tmp_dist'
    tmp_tables_list = []
    #error checking on params
    if db_context is None:
        raise ValueError("ERROR: You must pass a DBContext")
    if odbcconn.strip() == '':
        raise ValueError("ERROR: You must pass an odbc connection")
    if pc_data.strip() == '':
        raise ValueError("ERROR: You must pass a table name for pc_data")
    run_context = db_context.runContext
    if excel.upper() in yes_list:
        drop_table_if_exists(db_context=db_context, table=tmp_aggtable_name)

        # create a DB table with the information from the aggregate file,
        # then we will add columns "type","rdvalue",and "variable_name" to it so
        # it is a valid aggregate file for the means script.
        reader = SafeExcelReader(run_context,
                                 db_context=db_context,
                                 filename=agg_file,
                                 sheet_name=agg_sheet,
                                 output_table=tmp_aggtable_name)
        reader.createTable()
    elif excel.upper() in no_list:
        if agg_table.strip() == '':
            raise ValueError(
                "ERROR: You must specify an aggregate table name when excel='N'"
            )
        drop_table_if_exists(db_context=db_context, table=tmp_aggtable_name)

        # first we copy the aggregate information to a temp table
        db_context.executeNoResults(
            _COPY_TABLE_QUERY.format(tmp_aggtable_name, agg_table))
    else:
        raise Exception(
            "ERROR: parameter 'excel' must have a value of either 'Y' or 'N'")
    tmptab_tablespec = db_context.getTableSpec(tmp_aggtable_name)
    tmp_tables_list.append(tmp_aggtable_name)
    # now we add the 'levelvar' and 'rdvalue' columns to the table we created. These columns
    # are needed for creating a meansspec from the table.
    # adding levelvar column
    db_context.executeNoResults(
        _ADD_COLUMN_QUERY.format(tmp_aggtable_name, 'levelvar', 'varchar(10)'))
    # adding rdvalue column
    db_context.executeNoResults(
        _ADD_COLUMN_QUERY.format(tmp_aggtable_name, 'rdvalue', 'varchar(10)'))

    # add the column variable_name and copy the value from outputvar into it. This column
    # is needed for creating a meansspec from the table.
    length = tmptab_tablespec[db_identifier_quote('inputvar')].data_length + 4
    db_context.executeNoResults(
        _ADD_COLUMN_QUERY.format(tmp_aggtable_name, 'variable_name',
                                 'VARCHAR(' + str(length) + ")"))

    #now update the values for the columns we just added

    # first we change the values for those we are getting Means for
    db_context.executeNoResults(
        _UPDATE_MEAN_INPUT_QUERY.format(tmp_aggtable_name, 'MEAN', "inputvar",
                                        'Mean'))
    # next we update those we are getting T-Tests for
    db_context.executeNoResults(
        _UPDATE_MEAN_INPUT_QUERY.format(tmp_aggtable_name, 'STD',
                                        "RTRIM(LTRIM(inputvar)) + '_seh'",
                                        'TTEST'))

    # get a list of 3-tuples of (subject,wherevar,wherevalue)
    tuples_list = [(x, y, z) for x, y, z in db_context.executeBuffered(
        _SELECT_SUBJECTS_WHEREVAR_QUERY.format(tmp_aggtable_name))]

    cols = [
        '[variable_name]', '[levelvar]', '[wherevar]', '[wherevalue]',
        '[rdvalue]', '[type]', '[subject]'
    ]
    _insert_query = _INSERT_INTO_TABLE_QUERY.format(tmp_aggtable_name,
                                                    ",".join(cols))

    for row in tuples_list:
        subject = row[0]
        wherevar = row[1]
        wherevalue = row[2]
        # harcoding variable_name to pweight_{subject}
        values = [
            'pweight_' + subject, 'dcrxid', wherevar, wherevalue, "", 'N',
            subject
        ]
        db_context.executeNoResults(_insert_query, values)

    # now the aggregate table is ready for means, so run means on it
    means_class = Means(excel='N',
                        agg_ds=tmp_aggtable_name,
                        db_context=db_context,
                        inputds=indata,
                        odbcconn=odbcconn,
                        overwrite='Y')
    means_class.execute()

    if len(means_class.created_tables_list) != 1:
        raise ValueError(
            "Error: Means call returned more than one table. It should only return 'Mean_DCRXID' but it returned "
            + ",".join(means_class.created_tables_list))

    # setup the Peerttest dataset in SAS - This holds the variables we will loop through
    drop_table_if_exists(tmp_peertable_t_name, db_context)
    db_context.executeNoResults(
        _SELECT_PEER_TTEST_QUERY.format(table=tmp_peertable_t_name,
                                        aggtable=tmp_aggtable_name))
    tmp_tables_list.append(tmp_peertable_t_name)

    # setup the Peermean dataset in SAS - This holds the variables we will loop through
    drop_table_if_exists(tmp_peertable_mean_name, db_context)
    db_context.executeNoResults(
        _SELECT_PEER_MEAN_QUERY.format(table=tmp_peertable_mean_name,
                                       aggtable=tmp_aggtable_name))
    tmp_tables_list.append(tmp_peertable_mean_name)

    # getting similar districts into table tmp_similar_districts_table
    drop_table_if_exists(tmp_similar_districts_table, db_context)
    db_context.executeNoResults(
        _SELECT_SIMILAR_DISTRICTS_QUERY.format(
            tmptab=tmp_similar_districts_table,
            disttable=pc_data,
            table=indata))
    tmp_tables_list.append(tmp_similar_districts_table)
    # list of distinct subjects that appear in tmp_aggtable_name
    subj_list = list(set(tup[0] for tup in tuples_list))

    # creating 2 tables, one for means calculations and one for ttest calculations
    vars_list_means = [
        x for x in db_context.executeBuffered(
            _SELECT_PEERMEAN_QUERY.format(table=tmp_peertable_mean_name,
                                          where=''))
    ]
    vars_list_ttest = [
        x for x in db_context.executeBuffered(
            _SELECT_PEERTTEST_QUERY.format(table=tmp_peertable_t_name,
                                           where=''))
    ]
    means_cols = ["F1"] + ["pcount_" + x for x in subj_list]
    ttest_cols = ["F1"]
    for outvar, invar in vars_list_means:
        if outvar is not None and outvar.strip() not in means_cols:
            means_cols.append(outvar.strip())
        if invar is not None and invar.strip() + "_total" not in means_cols:
            means_cols.append(invar.strip() + "_total")
    for predvar, outvar, invar in vars_list_ttest:
        if invar is not None and invar.strip() + "_se" not in ttest_cols:
            ttest_cols.append(invar.strip() + "_se")
    # create temporary table to hold all results from calculations for means, and one for ttest
    tmp_mean_calculated_table = "peermean_tmp_calc_mean"
    tmp_ttest_calculated_table = "peermean_tmp_calc_ttest"
    # first means
    query = "CREATE TABLE " + tmp_mean_calculated_table + " ( [" + """] float,
                        [""".join(means_cols) + "] float )"
    drop_table_if_exists(tmp_mean_calculated_table, db_context)
    db_context.executeNoResults(query)
    tmp_tables_list.append(tmp_mean_calculated_table)
    # now ttest
    query = "CREATE TABLE " + tmp_ttest_calculated_table + " ( [" + """] float,
                        [""".join(ttest_cols) + "] float )"
    drop_table_if_exists(tmp_ttest_calculated_table, db_context)
    db_context.executeNoResults(query)
    tmp_tables_list.append(tmp_ttest_calculated_table)
    # cleanup
    del vars_list_ttest
    del query
    del means_cols
    del ttest_cols

    #define where clause for peermean and peerttest queries
    whereclause = "where subject = '{subj}'"

    # get list of distinct F1 values from out similar districts table
    f1_list = [
        x[0] for x in db_context.executeBuffered(
            _SELECT_F1_QUERY.format(table=tmp_similar_districts_table))
    ]

    # Go through the districts and insert them into the final table one at a time, inserting null for any values not calculated
    for f1 in f1_list:
        # list to hold all the select queries we create. One for the means table and one for the ttest table
        select_list_means = ["A.F1"]
        select_list_ttest = ["A.F1"]
        cols_means = ["F1"]
        cols_ttest = ["F1"]
        select_table_query = _MERGE_SIMILARDISTRICTS.format(
            table=means_class.created_tables_list[0],
            disttable=tmp_similar_districts_table,
            f1=f1)
        ttest_groupby = ["A.F1"]
        for subject in subj_list:
            # means_output_input_list is of the form  [ (outvar,invar) ]
            means_output_input_list = [
                x for x in db_context.executeBuffered(
                    _SELECT_PEERMEAN_QUERY.format(
                        table=tmp_peertable_mean_name,
                        where=whereclause.format(subj=subject)))
            ]
            # ttest_prev_input__list is of the form [ (predvar,outputvar,inputvar) ]
            ttest_prev_input__list = [
                x for x in db_context.executeBuffered(
                    _SELECT_PEERTTEST_QUERY.format(table=tmp_peertable_t_name,
                                                   where=whereclause.format(
                                                       subj=subject)))
            ]
            _count = _SUM_QUERY.format(subject=subject)
            for outvar, invar in means_output_input_list:
                total_query = "SUM( ISNULL([pweight_{subj}],0) * ISNULL([{invar}],0) )".format(
                    subj=subject, invar=invar)
                select_list_means.append(total_query + " as [" + invar +
                                         "_total]")
                select_list_means.append(
                    _MEAN_CASE_QUERY.format(count=_count,
                                            total=total_query,
                                            alias=db_identifier_quote(outvar)))
                cols_means.append("[" + invar + "_total]")
                cols_means.append(db_identifier_quote(outvar))
                # adding count variable
                if _count + " as [pcount_{subj}]".format(
                        subj=subject) not in select_list_means:
                    select_list_means.append(_count +
                                             " as [pcount_{subj}]".format(
                                                 subj=subject))
                    cols_means.append("[pcount_{subj}]".format(subj=subject))
            for predvar, outvar, inputvar in ttest_prev_input__list:
                se_query = _SE_QUERY.format(subj=subject, inputvar=inputvar)
                select_list_ttest.append(
                    _COUNT_CASE_QUERY.format(count="pcount_" + subject,
                                             se=se_query,
                                             pred=predvar,
                                             alias="[" + inputvar + "_se]"))
                cols_ttest.append("[" + inputvar + "_se]")
                ttest_groupby.append(predvar)
            if "pcount_" + subject not in ttest_groupby:
                ttest_groupby.append("pcount_" + subject)
        # now insert into means table
        insert_query = _INSERT_MEANS_CALCS_QUERY.format(
            table=tmp_mean_calculated_table,
            cols="""
                                                ,""".join(cols_means),
            selects="""
                                                ,""".join(select_list_means),
            froms=select_table_query)
        db_context.executeNoResults(insert_query)
        #now insert into ttest table
        insert_query = _INSERT_TTEST_CALCS_QUERY.format(
            table=tmp_ttest_calculated_table,
            cols="""
                                                ,""".join(cols_ttest),
            selects="""
                                                ,""".join(select_list_ttest),
            froms=select_table_query,
            meanstab=tmp_mean_calculated_table,
            groupby=",".join(ttest_groupby))
        db_context.executeNoResults(insert_query)

    # now we go through these output tables and get the t-values
    # setup initial select query list
    select_query_list = ["C.dcrxid"] + list(
        set([
            "ROUND(" + x + " + .0000000001,0) as [" + x + "]"
            for x, y in vars_list_means
        ]))
    for predvar, outvar, invar, subject in [
            x for x in db_context.executeBuffered(
                _SELECT_PEERTTEST_QUERY2.format(table=tmp_peertable_t_name,
                                                where=''))
    ]:
        select_query_list.append(
            _TTEST_CASE_QUERY.format(invar=invar,
                                     pred=predvar,
                                     critval=crit_val,
                                     alias=outvar,
                                     subj=subject))
    final_query = _TTEST_CALCS_QUERY.format(
        sels=",".join(list(set(select_query_list))),
        table=outdata,
        meanstab=tmp_mean_calculated_table,
        ttesttab=tmp_ttest_calculated_table,
        dcrxidtable=means_class.created_tables_list[0])
    drop_table_if_exists(outdata, db_context)
    db_context.executeNoResults(final_query)

    #cleanup temporary tables
    for table in tmp_tables_list:
        drop_table_if_exists(table, db_context)
Example #15
0
class Test(unittest.TestCase):
    def setUp(self):
        self.runContext = SuiteContext('unittest')
        self.db_context = self.runContext.getDBContext(tag='unittest')
        clear_all(self.db_context)
        self.reader = SafeExcelReader(self.runContext)
        self.reader.db_context = self.db_context
        self.testDataDir = self.runContext.tests_safe_dir

    def testXLS(self):
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        self.reader.outputTable = "Data1"
        self.reader.createTable()

        table_spec = self.db_context.getTableSpec('Data1')
        primary_key = table_spec.primary_key
        self.assertEquals(len(primary_key), 1)
        self.assertEquals(primary_key[0].field_name, '[import_order]')

    def testConstructor(self):
        reader = SafeExcelReader(self.runContext,
                                 filename=os.path.join(self.testDataDir,
                                                       _XLS_FILE),
                                 sheet_name='Data1',
                                 db_context=self.runContext.getDBContext(),
                                 output_table='Temp1',
                                 get_names=True,
                                 delimiter=',',
                                 import_order='import_order')
        reader.createTable()

        for name in get_table_names(self.db_context):
            self.assertEqual(
                '[temp1]', name,
                "Found name '{name}' instead of '[temp1]'".format(name=name))

    def testIntoPython(self):
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        rows = [row for row in self.reader.getRows()]
        self.assertEqual(300, len(rows),
                         'Expected 300 rows, found %d' % len(rows))

    def testCSVIntoDB(self):
        self.reader.filename = os.path.join(self.testDataDir, _CSV_FILE)
        self.reader.outputTable = "CSV1"
        self.reader.scan_all = True
        self.reader.getNames = True
        self.reader.delimiter = "|"
        self.reader.skip = 0
        self.reader.range = (0, 0, 100, 1024)
        self.reader.createTable()
        table_spec = self.db_context.getTableSpec('CSV1')
        for col_name in _CSV_COLUMNS:
            self.assertTrue(col_name in table_spec,
                            "Missing column {}".format(col_name))

        self.assertEquals(n_obs(table_spec), 100,
                          "Wrong number of rows in imported data")

    def test_xlcol(self):
        letters = map(lambda x: self.reader._xlcol(x), [0, 3, 25])
        self.assertTrue(letters == ['A', 'D', 'Z'],
                        'xlcol - translating numbers to letters - failed')

    def test_xlcolnumber(self):
        numbers = map(lambda x: self.reader._xlcolnumber(x), ['A', 'D', 'Z'])
        self.assertTrue(
            numbers == [0, 3, 25],
            'xlcolnumber - translating letters to numbers - failed')