def _initializeEncoders(self, encoderSpec): """ Initialize the encoders""" #Initializing scalar encoder if self.encoderType in ['adaptiveScalar', 'scalar']: if 'minval' in encoderSpec: self.minval = encoderSpec.pop('minval') else: self.minval = None if 'maxval' in encoderSpec: self.maxval = encoderSpec.pop('maxval') else: self.maxval = None self.encoder=adaptivescalar.AdaptiveScalarEncoder(name='AdaptiveScalarEncoder',\ w=self.w, n=self.n, minval=self.minval, maxval=self.maxval, periodic=False, forced=True) #Initializing category encoder elif self.encoderType == 'category': self.encoder=sdrcategory.SDRCategoryEncoder(name='categoryEncoder', \ w=self.w, n=self.n) #Initializing date encoder elif self.encoderType in ['date', 'datetime']: self.encoder = date.DateEncoder(name='dateEncoder') else: raise RuntimeError( 'Error in constructing class object. Either encoder type' 'or dataType must be specified')
def getStats(self, stats): BaseStatsCollector.getStats(self, stats) # We include subencoders for datetime field if there is a variation in encodings # for that particular subencoding # gym_melbourne_wed_train.csv has data only on the wednesdays, it doesn't # make sense to include dayOfWeek in the permutations because it is constant # in the entire dataset # We check for variation in sub-encodings by passing the timestamp field # through the maximal sub-encoder and checking for variation in post-encoding # values # Setup a datetime encoder with maximal resolution for each subencoder encoder = DateEncoder.DateEncoder( season=(1, 1), # width=366, resolution=1day dayOfWeek=(1, 1), # width=7, resolution=1day timeOfDay=(1, 1.0 / 60), # width=1440, resolution=1min weekend=1, # width=2, binary encoding holiday=1, # width=2, binary encoding ) # Collect all encoder outputs totalOrEncoderOutput = numpy.zeros(encoder.getWidth(), dtype=numpy.uint8) for value in self.valueList: numpy.logical_or(totalOrEncoderOutput, encoder.encode(value), totalOrEncoderOutput) encoderDescription = encoder.getDescription() numSubEncoders = len(encoderDescription) for i in range(numSubEncoders): subEncoderName, _ = encoderDescription[i] beginIdx = encoderDescription[i][1] if i == (numSubEncoders - 1): endIdx = encoder.getWidth() else: endIdx = encoderDescription[i + 1][1] stats[self.fieldname][subEncoderName] = \ (totalOrEncoderOutput[beginIdx:endIdx].sum()>1) decodedInput = encoder.decode(totalOrEncoderOutput)[0] if VERBOSITY > 2: print "--" print "Sub-encoders:" for subEncoderName, _ in encoderDescription: print "%s:%s" % (subEncoderName, stats[self.fieldname][subEncoderName])