Ejemplo n.º 1
0
    def _initializeEncoders(self, encoderSpec):
        """ Initialize the encoders"""

        #Initializing scalar encoder
        if self.encoderType in ['adaptiveScalar', 'scalar']:
            if 'minval' in encoderSpec:
                self.minval = encoderSpec.pop('minval')
            else:
                self.minval = None
            if 'maxval' in encoderSpec:
                self.maxval = encoderSpec.pop('maxval')
            else:
                self.maxval = None
            self.encoder=adaptivescalar.AdaptiveScalarEncoder(name='AdaptiveScalarEncoder',\
              w=self.w, n=self.n, minval=self.minval, maxval=self.maxval, periodic=False, forced=True)

        #Initializing category encoder
        elif self.encoderType == 'category':
            self.encoder=sdrcategory.SDRCategoryEncoder(name='categoryEncoder', \
                                                        w=self.w, n=self.n)

        #Initializing date encoder
        elif self.encoderType in ['date', 'datetime']:
            self.encoder = date.DateEncoder(name='dateEncoder')
        else:
            raise RuntimeError(
                'Error in constructing class object. Either encoder type'
                'or dataType must be specified')
Ejemplo n.º 2
0
    def getStats(self, stats):

        BaseStatsCollector.getStats(self, stats)

        # We include subencoders for datetime field if there is a variation in encodings
        # for that particular subencoding
        # gym_melbourne_wed_train.csv has data only on the wednesdays, it doesn't
        # make sense to include dayOfWeek in the permutations because it is constant
        # in the entire dataset

        # We check for variation in sub-encodings by passing the timestamp field
        # through the maximal sub-encoder and checking for variation in post-encoding
        # values

        # Setup a datetime encoder with maximal resolution for each subencoder
        encoder = DateEncoder.DateEncoder(
            season=(1, 1),  # width=366, resolution=1day
            dayOfWeek=(1, 1),  # width=7, resolution=1day
            timeOfDay=(1, 1.0 / 60),  # width=1440, resolution=1min
            weekend=1,  # width=2, binary encoding
            holiday=1,  # width=2, binary encoding
        )

        # Collect all encoder outputs
        totalOrEncoderOutput = numpy.zeros(encoder.getWidth(),
                                           dtype=numpy.uint8)
        for value in self.valueList:
            numpy.logical_or(totalOrEncoderOutput, encoder.encode(value),
                             totalOrEncoderOutput)

        encoderDescription = encoder.getDescription()
        numSubEncoders = len(encoderDescription)
        for i in range(numSubEncoders):
            subEncoderName, _ = encoderDescription[i]
            beginIdx = encoderDescription[i][1]
            if i == (numSubEncoders - 1):
                endIdx = encoder.getWidth()
            else:
                endIdx = encoderDescription[i + 1][1]
            stats[self.fieldname][subEncoderName] = \
                                       (totalOrEncoderOutput[beginIdx:endIdx].sum()>1)

        decodedInput = encoder.decode(totalOrEncoderOutput)[0]

        if VERBOSITY > 2:
            print "--"
            print "Sub-encoders:"
            for subEncoderName, _ in encoderDescription:
                print "%s:%s" % (subEncoderName,
                                 stats[self.fieldname][subEncoderName])