def setUp(self):
        
        self.run_context = SuiteContext( "unittest" )
        self.db_context = self.run_context.getDBContext( "unittest" )
        self.static_context = self.run_context.getDBContext( "static" )

        self.answer_dir = os.path.join( self.run_context.logs_dir, 'ttest_level_tests' )
        if not os.path.exists( self.answer_dir ):
            os.makedirs( self.answer_dir )
        
        self.data_dir = os.path.join( self.run_context.tests_safe_dir,
                                      'ttest', 'input_data' )
        
        self.specimen_dir = os.path.join( self.run_context.tests_safe_dir,
                                      'ttest', 'sas_outputs' )

        # libname ttest "H:\share\CSSC Folder\Score Report Group\Test Data\lib_TTestLevel";
        # %let agg_file = &cvsroot.\ScoreReportMacros\UnitTested\lib_TTestLevel\test\HI Spring 2008 Aggregations_Melissa.xls;
        # %let sheet=ttestlevel;
        # %SafeExcelRead(filename=&agg_file., sheetname =&sheet., DS_out =aggds);
        self.run_context.debug( "Reading data for ttest_level tests" )
        agg_file = os.path.join( self.data_dir, _AGG_FILE)
        reader = SafeExcelReader( self.run_context,
                agg_file, "ttestlevel", scan_all = True )
        self.agg_ds = [ row for row in reader.getRows() ]
        
        # Import the input datasets
        reader.db_context = self.static_context
        for ( grade, filename, table_name, sheet_name ) in _GRADE_FILES:
            if not table_exists( table_name, self.static_context ):
                self.run_context.debug( "Reading data for grade {}".format( grade ) )
                reader.filename = os.path.join( self.data_dir, filename )
                reader.outputTable = table_name
                reader.sheetName = sheet_name
                reader.createTable()
Esempio n. 2
0
def excel_to_vars( run_context, filename, sheetname ):
    rows = [ ]
    if not run_context:
        raise ValueError( 'run_context is null or empty.' )
    if not filename:
        raise ValueError( 'filename is null or empty.' )
    if not sheetname:
        raise ValueError( 'sheetname is null or empty.' )
    reader = SafeExcelReader( run_context=run_context, filename=filename, sheet_name=sheetname )
    rows = [ row for row in reader.getRows( ) ]  # copy the row values
    return rows
Esempio n. 3
0
    def readAggData(self):
        
        # Validate the input file
        self.ds_in = get_table_spec( self.ds_in, self.db_context )
        self.ds_in.populate_from_connection()
        self.db_context = self.ds_in.db_context
        self.run_context = self.db_context.runContext
        self.run_context.debug( "processing control file" )
        if not table_exists( self.ds_in ):
            raise ValueError( "Input dataset {} does not exist".format( self.ds_in ) )
        
        # Read the control file
        # SAS 3-9
        if self.use_excel:
            reader = SafeExcelReader( self.run_context, self.agg_ds, self.agg_sheet )
            self.agg_data = [ row for row in reader.getRows() ]
        else:
            self.agg_ds = get_table_spec( self.agg_ds, self.db_context )
            self.agg_data = dump( self.agg_ds )
        
        # Validate the control file columns
        # SAS 10-28
        missing_vars = set()
        for var_name in [ 'outputvar', 'inputvar', 'targetlevel', 'targetid', 'wheret', 'wheret_value', 'parentlevel',
                          'parentid', 'wherep', 'wherep_value', 'critval' ]:
            if var_name not in self.agg_data[0]:
                missing_vars.add( var_name )
        if missing_vars:
            raise ValueError( "TTest control sheet lacks required columns: {:', '}".format( Joiner( missing_vars ) ) )
        
        # Validate existence of requested columns
        # SAS 29-86
        for row in self.agg_data:
            if row.wheret is None:
                row.wheret = []
            else:
                row.wheret = [ x.strip().lower() for x in row.wheret.strip().split( '*' )]
                
            if row.wherep is None:
                row.wherep = []
            else:
                row.wherep = [ x.strip().lower() for x in row.wherep.strip().split( '*' )]
                
            if row.wheret_value is None:
                row.wheret_value = []
            else:
                row.wheret_value = [ x.strip() for x in row.wheret_value.strip().split( ' ' )]
                
            if row.wherep_value is None:
                row.wherep_value = []
            else:
                row.wherep_value = [ x.strip() for x in row.wherep_value.strip().split( ' ' )]
                
            row.inputvar = row.inputvar.lower().strip()
            row.targetid = row.targetid.lower().strip()
            row.parentid = row.parentid.lower().strip()
            row.targetlevel = row.targetlevel.lower().strip()
            row.parentlevel = row.parentlevel.lower().strip()

            for var_name in ( row.wheret
                              + row.wherep
                              + [ row.inputvar, row.targetid, row.parentid ] ):
                if var_name != '' and var_name not in self.ds_in:
                    missing_vars.add( var_name )
        if missing_vars:
            raise ValueError( "TTest input data lacks required variables: {:', '}".format( Joiner( missing_vars ) ) )
        
        # Sort control data
        #SAS 87-90
        self.agg_data.sort( key=lambda row : ( row.targetlevel, row.parentlevel ) )
        
        # Check for consistency across "target" and "parent" variables.
        #SAS 91-222
        last_targetlevel = _NONE_LEVEL
        last_parentlevel = _NONE_LEVEL
        self.target_levels = []
        messages = []
        
        for row in self.agg_data:
            wheret = tuple( row.wheret )
            wheret_value = tuple ( row.wheret_value )
            if len( wheret ) != len( wheret_value ):
                messages.append( 'Number of wheret_value items must match number of wheret items ("{0}" vs "{1}")'.format(
                        row.wheret, row.wheret_value ) )
            if row.targetlevel != last_targetlevel.level:
                last_targetlevel = LevelData( row.targetlevel, row.targetid, wheret, wheret_value )
                self.target_levels.append( last_targetlevel )
                last_parentlevel = _NONE_LEVEL
                
                # Create an output table in which to accumulate the results
                table_name = 'ttest_' + row.targetlevel
                last_targetlevel.output_table = TableSpec( self.db_context, table_name )
                last_targetlevel.output_table.add( self.ds_in[ row.targetid ].clone() )
            else:
                last_targetlevel.check( row.targetid, wheret, messages )
            
            wherep = tuple( row.wherep )
            wherep_value = tuple ( row.wherep_value )
            if len( wherep ) != len( wherep_value ):
                messages.append( 'Number of wherep_value items must match number of wherep items ("{0}" vs "{1}")'.format(
                        row.wherep, row.wherep_value ) )
            if row.parentlevel != last_parentlevel.level:
                last_parentlevel = LevelData( row.parentlevel, row.parentid, wherep, wherep_value )
                last_targetlevel.contents.append( last_parentlevel )
            else:
                last_parentlevel.check( row.parentid, wherep, messages )
                
            last_parentlevel.contents.append( row )
            last_targetlevel.output_table.add( FieldSpec( row.outputvar, 'TINYINT' ) )
            
            try:
                row.critval = float( row.critval )
                if not MIN_CRITVAL <= row.critval <= MAX_CRITVAL:
                    messages.append( "Bad critical value {} is not between {} and {}".format(
                            row.critval, MIN_CRITVAL, MAX_CRITVAL ) )
            except ValueError:
                messages.append( "Critical value {} is not a float".format( row.critval ) )
                
            try:
                row.outputvar = db_identifier_quote( row.outputvar )
            except ValueError:
                messages.append( "Output variable name {} is not a valid database identifier".format( row.outputvar ) )
                
            try:
                row.targetlevel = db_identifier_quote( row.targetlevel )
            except ValueError:
                messages.append( "Target level name {} is not a valid database identifier".format( row.targetlevel ) )
                
        for message in messages:
            self.run_context.error( message )
        
        if messages:
            raise ValueError( "Invalid inputs to ttest macro. See log for details" )
def read_spec_file(filename, merge_def):
    """Populate the field definitions of a :class:`MergeDef`
    
    Reads field definitions for a spec file and create the appropriate
    :class:`MergeFieldSpec` items to the merge_def. The left_table, right_table
    and db_context properties of the :MergeDef:` must already be set before
    calling this method.
    
    Parameters
    ----------
    filename : str
        Name of the merge spec file.
        
    merge_def : :class:`MergeDef`
        The merge definition object into which the field definitions will be
        read.
        
    Returns
    -------
    None
    """
    messages = []
    db_context = merge_def.db_context
    run_context = db_context.runContext
    if not merge_def.get_actual_tables(messages):
        for message in messages:
            run_context.error(message)
        raise ValueError("Input tables not properly specified in MergeDef")

    reader = SafeExcelReader(run_context,
                             filename,
                             sheet_name=0,
                             get_names=True)
    del merge_def[:]

    # Checklists to confirm that all variables from each input table appear exactly once
    n_occurred_left = {}
    for field in merge_def.left_input_table.iterkeys():
        n_occurred_left[field] = 0
    n_occurred_right = {}
    for field in merge_def.right_input_table.iterkeys():
        n_occurred_right[field] = 0

    # Read the left side of the merge first
    required_keys = merge_def.required_merge_keys = []
    optional_keys = merge_def.optional_merge_keys = []
    fuzzy_keys = merge_def.fuzzy_merge_keys = []
    for row in reader.getRows():
        field_name, input_name, priority, key_assignment, data_type, data_length = \
                _extract_field_properties( row )
        if input_name not in merge_def.left_input_table:
            raise ValueError('Did not find column named {} in table {}'.format(
                field_name, merge_def.left_input_table))
        left_field = merge_def.left_input_table[input_name]
        n_occurred_left[left_field.field_name] += 1
        if field_name != '<drop>':
            if field_name in merge_def:
                raise ValueError(
                    'Attempting to create two merge fields with the same name')
            merge_field = MergeFieldSpec(left_field, None, None)
            merge_field.field_name = field_name
            merge_def.add(merge_field)
            _write_type(merge_field, data_type, data_length)
            if priority == 'first':
                merge_field.priority_field = PRIORITY_LEFT
            elif priority == 'firstnonmissing':
                merge_field.priority_field = PRIORITY_LEFT_NONMISSING
            elif priority == 'second':
                # Note, this may later get changed to PRIORITY_RIGHT_NONMISSING
                merge_field.priority_field = PRIORITY_RIGHT
            elif priority is None or priority == '':
                merge_field.priority_field = PRIORITY_LEFT_ONLY
            else:
                raise ValueError(
                    "Found priority {}; must be one of \"FIRST\", \"SECOND\", or \"FIRST NON-MISSING\""
                )

        if key_assignment is not None and key_assignment != "":
            key_field = MergeFieldSpec(left_field, None, None)
            key_field.field_name = key_assignment
            match = _KEY_ASSIGNMENT_RE.match(key_assignment)
            if match is None:
                raise ValueError(
                    "Did not know how to interpret key expression {}".format(
                        key_assignment))
            key_type, key_nbr = match.groups((1, 2))
            if key_type in ("primary", "required"):
                _add_key(required_keys, key_field, key_nbr, key_assignment)
            elif key_type in ("secondary", "optional"):
                _add_key(optional_keys, key_field, key_nbr, key_assignment)
            elif key_type == "fuzzy":
                _add_fuzzy_key(fuzzy_keys, key_field, key_nbr, key_assignment)
            else:
                raise AssertionError("This error really should not happen")

    # Now read the right side of the merge
    reader.sheetName = 1
    for row in reader.getRows():
        field_name, input_name, priority, key_assignment, data_type, data_length = \
                _extract_field_properties( row )
        if input_name not in merge_def.right_input_table:
            raise ValueError('Did not find column named {} in table {}'.format(
                field_name, merge_def.right_input_table))
        right_field = merge_def.right_input_table[input_name]
        n_occurred_right[right_field.field_name] += 1
        if field_name != '<drop>':
            if field_name not in merge_def:
                merge_field = MergeFieldSpec(None, right_field,
                                             PRIORITY_RIGHT_ONLY)
                merge_field.field_name = field_name
                merge_def.add(merge_field)
                _write_type(merge_field, data_type, data_length)
                if not (priority is None or priority == ''):
                    raise ValueError(
                        "Field exists only on right side of merge: must have a blank priority"
                    )

            else:
                merge_field = merge_def[field_name]
                merge_field.right_field = copy.copy(right_field)

                # Check for compatible type assignment
                if merge_field.basic_type != data_type:
                    raise ValueError(
                        "Incompatible data types for field {}: {} on left to {} on right"
                        .format(merge_field, merge_field.basic_type,
                                data_type))
                if merge_field.is_charish and merge_field.data_length != data_length:
                    raise ValueError(
                        "Data lengths differ for field {}: {} on left to {} on right"
                        .format(merge_field, merge_field.data_length,
                                data_length))

                # Check for consistent priority assignment
                if merge_field.priority_field in (  PRIORITY_LEFT, PRIORITY_LEFT_NONMISSING ) and \
                        priority != 'second':
                    raise ValueError(
                        "Inconsistent priority designations for field {}".
                        format(merge_field))
                if merge_field.priority_field == PRIORITY_RIGHT and \
                        priority == 'second':
                    raise ValueError(
                        "Inconsistent priority designations for field {}".
                        format(merge_field))
                if merge_field.priority_field == PRIORITY_LEFT_ONLY:
                    raise ValueError(
                        "Blank priority designation not permitted when field appears on both sides of merge"
                    )
                if priority == 'firstnonmissing':
                    merge_field.priority_field = PRIORITY_RIGHT_NONMISSING

            # Do key assignment
            if key_assignment is not None and key_assignment != "":
                match = _KEY_ASSIGNMENT_RE.match(key_assignment)
                if match is None:
                    raise ValueError(
                        "Did not know how to interpret key expression {}".
                        format(key_assignment))
                key_type, key_nbr = match.groups((1, 2))
                key_nbr = int(key_nbr) - 1
                key_field = None
                if key_type in ("primary", "required"):
                    if key_nbr < len(required_keys):
                        key_field = required_keys[key_nbr]
                elif key_type in ("secondary", "optional"):
                    if key_nbr < len(optional_keys):
                        key_field = optional_keys[key_nbr]
                elif key_type == "fuzzy":
                    if key_nbr < len(fuzzy_keys):
                        for key_field in fuzzy_keys[key_nbr]:
                            if key_field.right_field is None:
                                break
                else:
                    raise AssertionError("This error really should not happen")
                if key_field is None:
                    raise ValueError(
                        "Key {} was specified only on right side of merge".
                        format(key_assignment))
                if key_field.right_field is not None:
                    raise ValueError(
                        "Too many keys defined on right side of merge with key designation {}"
                        .format(key_assignment))
                key_field.right_field = right_field

    # Confirm that each input variable was processed exactly once.
    succeed = True
    for k, v in n_occurred_left.items():
        if v != 1:
            succeed = False
            run_context.error(
                "Each variable from left input table must occur exactly once on spec sheet. {} appeared {} times"
                .format(k, v))
    for k, v in n_occurred_right.items():
        if v != 1:
            succeed = False
            run_context.error(
                "Each variable from right input table must occur exactly once on spec sheet. {} appeared {} times"
                .format(k, v))
    if not succeed:
        raise ValueError("Missing or duplicate variables on merge spec sheet")
Esempio n. 5
0
class Test(unittest.TestCase):
    def setUp(self):
        self.runContext = SuiteContext('unittest')
        self.db_context = self.runContext.getDBContext(tag='unittest')
        clear_all(self.db_context)
        self.reader = SafeExcelReader(self.runContext)
        self.reader.db_context = self.db_context
        self.testDataDir = self.runContext.tests_safe_dir

    def testXLS(self):
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        self.reader.outputTable = "Data1"
        self.reader.createTable()

        table_spec = self.db_context.getTableSpec('Data1')
        primary_key = table_spec.primary_key
        self.assertEquals(len(primary_key), 1)
        self.assertEquals(primary_key[0].field_name, '[import_order]')

    def testConstructor(self):
        reader = SafeExcelReader(self.runContext,
                                 filename=os.path.join(self.testDataDir,
                                                       _XLS_FILE),
                                 sheet_name='Data1',
                                 db_context=self.runContext.getDBContext(),
                                 output_table='Temp1',
                                 get_names=True,
                                 delimiter=',',
                                 import_order='import_order')
        reader.createTable()

        for name in get_table_names(self.db_context):
            self.assertEqual(
                '[temp1]', name,
                "Found name '{name}' instead of '[temp1]'".format(name=name))

    def testIntoPython(self):
        self.reader.filename = os.path.join(self.testDataDir, _XLS_FILE)
        self.reader.sheetName = "Data1"
        rows = [row for row in self.reader.getRows()]
        self.assertEqual(300, len(rows),
                         'Expected 300 rows, found %d' % len(rows))

    def testCSVIntoDB(self):
        self.reader.filename = os.path.join(self.testDataDir, _CSV_FILE)
        self.reader.outputTable = "CSV1"
        self.reader.scan_all = True
        self.reader.getNames = True
        self.reader.delimiter = "|"
        self.reader.skip = 0
        self.reader.range = (0, 0, 100, 1024)
        self.reader.createTable()
        table_spec = self.db_context.getTableSpec('CSV1')
        for col_name in _CSV_COLUMNS:
            self.assertTrue(col_name in table_spec,
                            "Missing column {}".format(col_name))

        self.assertEquals(n_obs(table_spec), 100,
                          "Wrong number of rows in imported data")

    def test_xlcol(self):
        letters = map(lambda x: self.reader._xlcol(x), [0, 3, 25])
        self.assertTrue(letters == ['A', 'D', 'Z'],
                        'xlcol - translating numbers to letters - failed')

    def test_xlcolnumber(self):
        numbers = map(lambda x: self.reader._xlcolnumber(x), ['A', 'D', 'Z'])
        self.assertTrue(
            numbers == [0, 3, 25],
            'xlcolnumber - translating letters to numbers - failed')