def setUp(self): self.run_context = SuiteContext( "unittest" ) self.db_context = self.run_context.getDBContext( "unittest" ) self.static_context = self.run_context.getDBContext( "static" ) self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' ) if not os.path.exists( self.answer_dir ): os.makedirs( self.answer_dir ) self.data_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'input_data' ) self.specimen_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'sas_outputs' ) # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel"; # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls; # %let sheet=ttestlevel; # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds); self.run_context.debug( "Reading data for ttest_level tests" ) agg_file = os.path.join( self.data_dir, _AGG_FILE) reader = SafeExcelReader( self.run_context, agg_file, "ttestlevel", scan_all = True ) self.agg_ds = [ row for row in reader.getRows() ] # Import the input datasets reader.db_context = self.static_context for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES: if not table_exists( table_name, self.static_context ): self.run_context.debug( "Reading data for grade {}".format( grade ) ) reader.filename = os.path.join( self.data_dir, filename ) reader.outputTable = table_name reader.sheetName = sheet_name reader.createTable()
def excel_to_vars( run_context, filename, sheetname ): rows = [ ] if not run_context: raise ValueError( 'run_context is null or empty.' ) if not filename: raise ValueError( 'filename is null or empty.' ) if not sheetname: raise ValueError( 'sheetname is null or empty.' ) reader = SafeExcelReader( run_context=run_context, filename=filename, sheet_name=sheetname ) rows = [ row for row in reader.getRows( ) ] # copy the row values return rows
def readAggData(self): # Validate the input file self.ds_in = get_table_spec( self.ds_in, self.db_context ) self.ds_in.populate_from_connection() self.db_context = self.ds_in.db_context self.run_context = self.db_context.runContext self.run_context.debug( "processing control file" ) if not table_exists( self.ds_in ): raise ValueError( "Input dataset {} does not exist".format( self.ds_in ) ) # Read the control file # SAS 3-9 if self.use_excel: reader = SafeExcelReader( self.run_context, self.agg_ds, self.agg_sheet ) self.agg_data = [ row for row in reader.getRows() ] else: self.agg_ds = get_table_spec( self.agg_ds, self.db_context ) self.agg_data = dump( self.agg_ds ) # Validate the control file columns # SAS 10-28 missing_vars = set() for var_name in [ 'outputvar', 'inputvar', 'targetlevel', 'targetid', 'wheret', 'wheret_value', 'parentlevel', 'parentid', 'wherep', 'wherep_value', 'critval' ]: if var_name not in self.agg_data[0]: missing_vars.add( var_name ) if missing_vars: raise ValueError( "TTest control sheet lacks required columns: {:', '}".format( Joiner( missing_vars ) ) ) # Validate existence of requested columns # SAS 29-86 for row in self.agg_data: if row.wheret is None: row.wheret = [] else: row.wheret = [ x.strip().lower() for x in row.wheret.strip().split( '*' )] if row.wherep is None: row.wherep = [] else: row.wherep = [ x.strip().lower() for x in row.wherep.strip().split( '*' )] if row.wheret_value is None: row.wheret_value = [] else: row.wheret_value = [ x.strip() for x in row.wheret_value.strip().split( ' ' )] if row.wherep_value is None: row.wherep_value = [] else: row.wherep_value = [ x.strip() for x in row.wherep_value.strip().split( ' ' )] row.inputvar = row.inputvar.lower().strip() row.targetid = row.targetid.lower().strip() row.parentid = row.parentid.lower().strip() row.targetlevel = row.targetlevel.lower().strip() row.parentlevel = row.parentlevel.lower().strip() for var_name in ( row.wheret + row.wherep + [ row.inputvar, row.targetid, row.parentid ] ): if var_name != '' and var_name not in self.ds_in: missing_vars.add( var_name ) if missing_vars: raise ValueError( "TTest input data lacks required variables: {:', '}".format( Joiner( missing_vars ) ) ) # Sort control data #SAS 87-90 self.agg_data.sort( key=lambda row : ( row.targetlevel, row.parentlevel ) ) # Check for consistency across "target" and "parent" variables. #SAS 91-222 last_targetlevel = _NONE_LEVEL last_parentlevel = _NONE_LEVEL self.target_levels = [] messages = [] for row in self.agg_data: wheret = tuple( row.wheret ) wheret_value = tuple ( row.wheret_value ) if len( wheret ) != len( wheret_value ): messages.append( 'Number of wheret_value items must match number of wheret items ("{0}" vs "{1}")'.format( row.wheret, row.wheret_value ) ) if row.targetlevel != last_targetlevel.level: last_targetlevel = LevelData( row.targetlevel, row.targetid, wheret, wheret_value ) self.target_levels.append( last_targetlevel ) last_parentlevel = _NONE_LEVEL # Create an output table in which to accumulate the results table_name = 'ttest_' + row.targetlevel last_targetlevel.output_table = TableSpec( self.db_context, table_name ) last_targetlevel.output_table.add( self.ds_in[ row.targetid ].clone() ) else: last_targetlevel.check( row.targetid, wheret, messages ) wherep = tuple( row.wherep ) wherep_value = tuple ( row.wherep_value ) if len( wherep ) != len( wherep_value ): messages.append( 'Number of wherep_value items must match number of wherep items ("{0}" vs "{1}")'.format( row.wherep, row.wherep_value ) ) if row.parentlevel != last_parentlevel.level: last_parentlevel = LevelData( row.parentlevel, row.parentid, wherep, wherep_value ) last_targetlevel.contents.append( last_parentlevel ) else: last_parentlevel.check( row.parentid, wherep, messages ) last_parentlevel.contents.append( row ) last_targetlevel.output_table.add( FieldSpec( row.outputvar, 'TINYINT' ) ) try: row.critval = float( row.critval ) if not MIN_CRITVAL <= row.critval <= MAX_CRITVAL: messages.append( "Bad critical value {} is not between {} and {}".format( row.critval, MIN_CRITVAL, MAX_CRITVAL ) ) except ValueError: messages.append( "Critical value {} is not a float".format( row.critval ) ) try: row.outputvar = db_identifier_quote( row.outputvar ) except ValueError: messages.append( "Output variable name {} is not a valid database identifier".format( row.outputvar ) ) try: row.targetlevel = db_identifier_quote( row.targetlevel ) except ValueError: messages.append( "Target level name {} is not a valid database identifier".format( row.targetlevel ) ) for message in messages: self.run_context.error( message ) if messages: raise ValueError( "Invalid inputs to ttest macro. See log for details" )
def read_spec_file(filename, merge_def): """Populate the field definitions of a :class:`MergeDef` Reads field definitions for a spec file and create the appropriate :class:`MergeFieldSpec` items to the merge_def. The left_table, right_table and db_context properties of the :MergeDef:` must already be set before calling this method. Parameters ---------- filename : str Name of the merge spec file. merge_def : :class:`MergeDef` The merge definition object into which the field definitions will be read. Returns ------- None """ messages = [] db_context = merge_def.db_context run_context = db_context.runContext if not merge_def.get_actual_tables(messages): for message in messages: run_context.error(message) raise ValueError("Input tables not properly specified in MergeDef") reader = SafeExcelReader(run_context, filename, sheet_name=0, get_names=True) del merge_def[:] # Checklists to confirm that all variables from each input table appear exactly once n_occurred_left = {} for field in merge_def.left_input_table.iterkeys(): n_occurred_left[field] = 0 n_occurred_right = {} for field in merge_def.right_input_table.iterkeys(): n_occurred_right[field] = 0 # Read the left side of the merge first required_keys = merge_def.required_merge_keys = [] optional_keys = merge_def.optional_merge_keys = [] fuzzy_keys = merge_def.fuzzy_merge_keys = [] for row in reader.getRows(): field_name, input_name, priority, key_assignment, data_type, data_length = \ _extract_field_properties( row ) if input_name not in merge_def.left_input_table: raise ValueError('Did not find column named {} in table {}'.format( field_name, merge_def.left_input_table)) left_field = merge_def.left_input_table[input_name] n_occurred_left[left_field.field_name] += 1 if field_name != '<drop>': if field_name in merge_def: raise ValueError( 'Attempting to create two merge fields with the same name') merge_field = MergeFieldSpec(left_field, None, None) merge_field.field_name = field_name merge_def.add(merge_field) _write_type(merge_field, data_type, data_length) if priority == 'first': merge_field.priority_field = PRIORITY_LEFT elif priority == 'firstnonmissing': merge_field.priority_field = PRIORITY_LEFT_NONMISSING elif priority == 'second': # Note, this may later get changed to PRIORITY_RIGHT_NONMISSING merge_field.priority_field = PRIORITY_RIGHT elif priority is None or priority == '': merge_field.priority_field = PRIORITY_LEFT_ONLY else: raise ValueError( "Found priority {}; must be one of \"FIRST\", \"SECOND\", or \"FIRST NON-MISSING\"" ) if key_assignment is not None and key_assignment != "": key_field = MergeFieldSpec(left_field, None, None) key_field.field_name = key_assignment match = _KEY_ASSIGNMENT_RE.match(key_assignment) if match is None: raise ValueError( "Did not know how to interpret key expression {}".format( key_assignment)) key_type, key_nbr = match.groups((1, 2)) if key_type in ("primary", "required"): _add_key(required_keys, key_field, key_nbr, key_assignment) elif key_type in ("secondary", "optional"): _add_key(optional_keys, key_field, key_nbr, key_assignment) elif key_type == "fuzzy": _add_fuzzy_key(fuzzy_keys, key_field, key_nbr, key_assignment) else: raise AssertionError("This error really should not happen") # Now read the right side of the merge reader.sheetName = 1 for row in reader.getRows(): field_name, input_name, priority, key_assignment, data_type, data_length = \ _extract_field_properties( row ) if input_name not in merge_def.right_input_table: raise ValueError('Did not find column named {} in table {}'.format( field_name, merge_def.right_input_table)) right_field = merge_def.right_input_table[input_name] n_occurred_right[right_field.field_name] += 1 if field_name != '<drop>': if field_name not in merge_def: merge_field = MergeFieldSpec(None, right_field, PRIORITY_RIGHT_ONLY) merge_field.field_name = field_name merge_def.add(merge_field) _write_type(merge_field, data_type, data_length) if not (priority is None or priority == ''): raise ValueError( "Field exists only on right side of merge: must have a blank priority" ) else: merge_field = merge_def[field_name] merge_field.right_field = copy.copy(right_field) # Check for compatible type assignment if merge_field.basic_type != data_type: raise ValueError( "Incompatible data types for field {}: {} on left to {} on right" .format(merge_field, merge_field.basic_type, data_type)) if merge_field.is_charish and merge_field.data_length != data_length: raise ValueError( "Data lengths differ for field {}: {} on left to {} on right" .format(merge_field, merge_field.data_length, data_length)) # Check for consistent priority assignment if merge_field.priority_field in ( PRIORITY_LEFT, PRIORITY_LEFT_NONMISSING ) and \ priority != 'second': raise ValueError( "Inconsistent priority designations for field {}". format(merge_field)) if merge_field.priority_field == PRIORITY_RIGHT and \ priority == 'second': raise ValueError( "Inconsistent priority designations for field {}". format(merge_field)) if merge_field.priority_field == PRIORITY_LEFT_ONLY: raise ValueError( "Blank priority designation not permitted when field appears on both sides of merge" ) if priority == 'firstnonmissing': merge_field.priority_field = PRIORITY_RIGHT_NONMISSING # Do key assignment if key_assignment is not None and key_assignment != "": match = _KEY_ASSIGNMENT_RE.match(key_assignment) if match is None: raise ValueError( "Did not know how to interpret key expression {}". format(key_assignment)) key_type, key_nbr = match.groups((1, 2)) key_nbr = int(key_nbr) - 1 key_field = None if key_type in ("primary", "required"): if key_nbr < len(required_keys): key_field = required_keys[key_nbr] elif key_type in ("secondary", "optional"): if key_nbr < len(optional_keys): key_field = optional_keys[key_nbr] elif key_type == "fuzzy": if key_nbr < len(fuzzy_keys): for key_field in fuzzy_keys[key_nbr]: if key_field.right_field is None: break else: raise AssertionError("This error really should not happen") if key_field is None: raise ValueError( "Key {} was specified only on right side of merge". format(key_assignment)) if key_field.right_field is not None: raise ValueError( "Too many keys defined on right side of merge with key designation {}" .format(key_assignment)) key_field.right_field = right_field # Confirm that each input variable was processed exactly once. succeed = True for k, v in n_occurred_left.items(): if v != 1: succeed = False run_context.error( "Each variable from left input table must occur exactly once on spec sheet. {} appeared {} times" .format(k, v)) for k, v in n_occurred_right.items(): if v != 1: succeed = False run_context.error( "Each variable from right input table must occur exactly once on spec sheet. {} appeared {} times" .format(k, v)) if not succeed: raise ValueError("Missing or duplicate variables on merge spec sheet")
class Test(unittest.TestCase): def setUp(self): self.runContext = SuiteContext('unittest') self.db_context = self.runContext.getDBContext(tag='unittest') clear_all(self.db_context) self.reader = SafeExcelReader(self.runContext) self.reader.db_context = self.db_context self.testDataDir = self.runContext.tests_safe_dir def testXLS(self): self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE) self.reader.sheetName = "Data1" self.reader.outputTable = "Data1" self.reader.createTable() table_spec = self.db_context.getTableSpec('Data1') primary_key = table_spec.primary_key self.assertEquals(len(primary_key), 1) self.assertEquals(primary_key[0].field_name, '[import_order]') def testConstructor(self): reader = SafeExcelReader(self.runContext, filename=os.path.join(self.testDataDir, _XLS_FILE), sheet_name='Data1', db_context=self.runContext.getDBContext(), output_table='Temp1', get_names=True, delimiter=',', import_order='import_order') reader.createTable() for name in get_table_names(self.db_context): self.assertEqual( '[temp1]', name, "Found name '{name}' instead of '[temp1]'".format(name=name)) def testIntoPython(self): self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE) self.reader.sheetName = "Data1" rows = [row for row in self.reader.getRows()] self.assertEqual(300, len(rows), 'Expected 300 rows, found %d' % len(rows)) def testCSVIntoDB(self): self.reader.filename = os.path.join(self.testDataDir, _CSV_FILE) self.reader.outputTable = "CSV1" self.reader.scan_all = True self.reader.getNames = True self.reader.delimiter = "|" self.reader.skip = 0 self.reader.range = (0, 0, 100, 1024) self.reader.createTable() table_spec = self.db_context.getTableSpec('CSV1') for col_name in _CSV_COLUMNS: self.assertTrue(col_name in table_spec, "Missing column {}".format(col_name)) self.assertEquals(n_obs(table_spec), 100, "Wrong number of rows in imported data") def test_xlcol(self): letters = map(lambda x: self.reader._xlcol(x), [0, 3, 25]) self.assertTrue(letters == ['A', 'D', 'Z'], 'xlcol - translating numbers to letters - failed') def test_xlcolnumber(self): numbers = map(lambda x: self.reader._xlcolnumber(x), ['A', 'D', 'Z']) self.assertTrue( numbers == [0, 3, 25], 'xlcolnumber - translating letters to numbers - failed')