Example #1
0
class InputReaderTest(unittest.TestCase):

    def setUp( self ):
        ''' Construct our InputReader object, pass it the test csv file '''
        self.mInputReader = InputReader( testFile )

    def test_read( self ):
        ''' Test the csv read functionality against known values '''
        self.mInputReader.readFile()
        mRawData = self.mInputReader.getRawData()
        self.assertEqual( mRawData, [['InputReader', 'Test', 'CSV'],
                                    ['1', '44', '-4.3'],
                                     ['234', '-45', '0.45']] )

    def test_pathSet( self ):
        ''' Test set file path and FileNotFoundError exception'''
        # Set a bogus file name
        self.mInputReader.setFilePath( '../../res/NonExist.csv' )
        self.assertRaises( FileNotFoundError, self.mInputReader.readFile() )
Example #2
0
class FeatureExtractor( metaclass=ABCMeta ):
    ''' 
    Abstract base class for extracting and generating features from input 
    resource file.  Implementation classes must implement getFeatures() 
    and getTrainingData() appropriately for the given input source.
    '''

    def __init__( self, mInputReader , filterPath='../res/FeatureFilter' ):
        '''
        Constructor - arguments passed from main
        @param mInputReader: InputReader object for setting raw data
        '''
        # Feature dump and filter path
        self.outCSVPath = '../../tmp/featureDump.csv'
        self.filterCSVPath = filterPath

        # Get raw data from the passed InputReader
        mInputReader.readFile()
        self.rawData = mInputReader.getRawData()

        # Initialize feature set and training data from raw data
        self.features = self.rawData[0]
        self.trainingData = np.array( self.rawData[1:] )
        
        # Construct the InputReader used for feature filtering
        self.filterReader = InputReader( self.filterCSVPath )

        # Initialize number of samples removed
        self.nRmvSamples = 0


    def setOutCSVPath( self , fPath ):
        '''@param fPath: relative location and name of feature dump CSV'''
        self.outCSVPath = fPath


    def setFilterPath( self , fPath ):
        '''@param fPath: relative location and name of feature filter CSV'''
        self.filterCSVPath = fPath


    def getFeatures( self ):
        return self.features


    def getTrainingData( self ):
        return self.trainingData


    def setTrainingData( self, data ):
        assert( isinstance( data, np.ndarray ) )
        self.trainingData = data

    
    def getSampleCnt( self ):
        return len( self.trainingData )


    def getRmvSampleCnt( self ):
        return self.nRmvSamples


    def listIdx( self, feature ):
        '''
        Return the list index of a given feature
        @param feature: training feature
        @return index: index of passed feature
        '''
        return self.features.index( feature )
        

    def applyFeatureFilter( self ):
        ''' 
        Reads the filter resource file and accordingly removes the feature
        from each sample.
        '''
        # Read out the resource content
        self.filterReader.readFile()

        # Stash the results to a local list
        mFilterList = self.filterReader.getRawData()[0]
        
        # Use our list index method to find appropriate column in feature 
        # list to remove
        for feature in mFilterList:
            try:
                idx = self.listIdx( feature )
                del self.features[idx]
                self.trainingData = np.delete( self.trainingData, idx, 1 )
            except ValueError:
                print( 'Unable to remove feature %s!' % feature )
    

    def writeFeaturesToCSV( self ):
        ''' 
        Dump the transformed data out to CSV for external eval
        Note: This shouldn't be called w/o extracting features from a 
        derived class first.
        '''
        mDumpFile = open( self.outCSVPath, 'w', newline='' )
        mCSVWriter = csv.writer( mDumpFile, delimiter=',' )
        
        # First write the features to the first row of the dump file
        mCSVWriter.writerow( self.features )

        # Then, dump all training data writing by row/record
        mCSVWriter.writerows( self.trainingData )

        # Release file i/o
        mDumpFile.close()


    @abstractmethod
    def extractFeatures( self ):
        ''' This method is to be implemented by subclasses'''
        pass


    def __del__( self ):
        '''No Destructor implementation'''
        pass