def setUp(self): self.run_context = SuiteContext( "unittest" ) self.db_context = self.run_context.getDBContext( "unittest" ) self.static_context = self.run_context.getDBContext( "static" ) self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' ) if not os.path.exists( self.answer_dir ): os.makedirs( self.answer_dir ) self.data_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'input_data' ) self.specimen_dir = os.path.join( self.run_context.tests_safe_dir, 'ttest', 'sas_outputs' ) # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel"; # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls; # %let sheet=ttestlevel; # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds); self.run_context.debug( "Reading data for ttest_level tests" ) agg_file = os.path.join( self.data_dir, _AGG_FILE) reader = SafeExcelReader( self.run_context, agg_file, "ttestlevel", scan_all = True ) self.agg_ds = [ row for row in reader.getRows() ] # Import the input datasets reader.db_context = self.static_context for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES: if not table_exists( table_name, self.static_context ): self.run_context.debug( "Reading data for grade {}".format( grade ) ) reader.filename = os.path.join( self.data_dir, filename ) reader.outputTable = table_name reader.sheetName = sheet_name reader.createTable()
def read_spec_file(filename, merge_def): """Populate the field definitions of a :class:`MergeDef` Reads field definitions for a spec file and create the appropriate :class:`MergeFieldSpec` items to the merge_def. The left_table, right_table and db_context properties of the :MergeDef:` must already be set before calling this method. Parameters ---------- filename : str Name of the merge spec file. merge_def : :class:`MergeDef` The merge definition object into which the field definitions will be read. Returns ------- None """ messages = [] db_context = merge_def.db_context run_context = db_context.runContext if not merge_def.get_actual_tables(messages): for message in messages: run_context.error(message) raise ValueError("Input tables not properly specified in MergeDef") reader = SafeExcelReader(run_context, filename, sheet_name=0, get_names=True) del merge_def[:] # Checklists to confirm that all variables from each input table appear exactly once n_occurred_left = {} for field in merge_def.left_input_table.iterkeys(): n_occurred_left[field] = 0 n_occurred_right = {} for field in merge_def.right_input_table.iterkeys(): n_occurred_right[field] = 0 # Read the left side of the merge first required_keys = merge_def.required_merge_keys = [] optional_keys = merge_def.optional_merge_keys = [] fuzzy_keys = merge_def.fuzzy_merge_keys = [] for row in reader.getRows(): field_name, input_name, priority, key_assignment, data_type, data_length = \ _extract_field_properties( row ) if input_name not in merge_def.left_input_table: raise ValueError('Did not find column named {} in table {}'.format( field_name, merge_def.left_input_table)) left_field = merge_def.left_input_table[input_name] n_occurred_left[left_field.field_name] += 1 if field_name != '<drop>': if field_name in merge_def: raise ValueError( 'Attempting to create two merge fields with the same name') merge_field = MergeFieldSpec(left_field, None, None) merge_field.field_name = field_name merge_def.add(merge_field) _write_type(merge_field, data_type, data_length) if priority == 'first': merge_field.priority_field = PRIORITY_LEFT elif priority == 'firstnonmissing': merge_field.priority_field = PRIORITY_LEFT_NONMISSING elif priority == 'second': # Note, this may later get changed to PRIORITY_RIGHT_NONMISSING merge_field.priority_field = PRIORITY_RIGHT elif priority is None or priority == '': merge_field.priority_field = PRIORITY_LEFT_ONLY else: raise ValueError( "Found priority {}; must be one of \"FIRST\", \"SECOND\", or \"FIRST NON-MISSING\"" ) if key_assignment is not None and key_assignment != "": key_field = MergeFieldSpec(left_field, None, None) key_field.field_name = key_assignment match = _KEY_ASSIGNMENT_RE.match(key_assignment) if match is None: raise ValueError( "Did not know how to interpret key expression {}".format( key_assignment)) key_type, key_nbr = match.groups((1, 2)) if key_type in ("primary", "required"): _add_key(required_keys, key_field, key_nbr, key_assignment) elif key_type in ("secondary", "optional"): _add_key(optional_keys, key_field, key_nbr, key_assignment) elif key_type == "fuzzy": _add_fuzzy_key(fuzzy_keys, key_field, key_nbr, key_assignment) else: raise AssertionError("This error really should not happen") # Now read the right side of the merge reader.sheetName = 1 for row in reader.getRows(): field_name, input_name, priority, key_assignment, data_type, data_length = \ _extract_field_properties( row ) if input_name not in merge_def.right_input_table: raise ValueError('Did not find column named {} in table {}'.format( field_name, merge_def.right_input_table)) right_field = merge_def.right_input_table[input_name] n_occurred_right[right_field.field_name] += 1 if field_name != '<drop>': if field_name not in merge_def: merge_field = MergeFieldSpec(None, right_field, PRIORITY_RIGHT_ONLY) merge_field.field_name = field_name merge_def.add(merge_field) _write_type(merge_field, data_type, data_length) if not (priority is None or priority == ''): raise ValueError( "Field exists only on right side of merge: must have a blank priority" ) else: merge_field = merge_def[field_name] merge_field.right_field = copy.copy(right_field) # Check for compatible type assignment if merge_field.basic_type != data_type: raise ValueError( "Incompatible data types for field {}: {} on left to {} on right" .format(merge_field, merge_field.basic_type, data_type)) if merge_field.is_charish and merge_field.data_length != data_length: raise ValueError( "Data lengths differ for field {}: {} on left to {} on right" .format(merge_field, merge_field.data_length, data_length)) # Check for consistent priority assignment if merge_field.priority_field in ( PRIORITY_LEFT, PRIORITY_LEFT_NONMISSING ) and \ priority != 'second': raise ValueError( "Inconsistent priority designations for field {}". format(merge_field)) if merge_field.priority_field == PRIORITY_RIGHT and \ priority == 'second': raise ValueError( "Inconsistent priority designations for field {}". format(merge_field)) if merge_field.priority_field == PRIORITY_LEFT_ONLY: raise ValueError( "Blank priority designation not permitted when field appears on both sides of merge" ) if priority == 'firstnonmissing': merge_field.priority_field = PRIORITY_RIGHT_NONMISSING # Do key assignment if key_assignment is not None and key_assignment != "": match = _KEY_ASSIGNMENT_RE.match(key_assignment) if match is None: raise ValueError( "Did not know how to interpret key expression {}". format(key_assignment)) key_type, key_nbr = match.groups((1, 2)) key_nbr = int(key_nbr) - 1 key_field = None if key_type in ("primary", "required"): if key_nbr < len(required_keys): key_field = required_keys[key_nbr] elif key_type in ("secondary", "optional"): if key_nbr < len(optional_keys): key_field = optional_keys[key_nbr] elif key_type == "fuzzy": if key_nbr < len(fuzzy_keys): for key_field in fuzzy_keys[key_nbr]: if key_field.right_field is None: break else: raise AssertionError("This error really should not happen") if key_field is None: raise ValueError( "Key {} was specified only on right side of merge". format(key_assignment)) if key_field.right_field is not None: raise ValueError( "Too many keys defined on right side of merge with key designation {}" .format(key_assignment)) key_field.right_field = right_field # Confirm that each input variable was processed exactly once. succeed = True for k, v in n_occurred_left.items(): if v != 1: succeed = False run_context.error( "Each variable from left input table must occur exactly once on spec sheet. {} appeared {} times" .format(k, v)) for k, v in n_occurred_right.items(): if v != 1: succeed = False run_context.error( "Each variable from right input table must occur exactly once on spec sheet. {} appeared {} times" .format(k, v)) if not succeed: raise ValueError("Missing or duplicate variables on merge spec sheet")