class LogEncoder(Encoder): """ This class wraps the ScalarEncoder class. A Log encoder represents a floating point value on a logarithmic scale. valueToEncode = log10(input) w -- number of bits to set in output minval -- minimum input value. must be greater than 0. Lower values are reset to this value maxval -- maximum input value (input is strictly less if periodic == True) periodic -- If true, then the input value "wraps around" such that minval = maxval For a periodic value, the input must be strictly less than maxval, otherwise maxval is a true upper bound. Exactly one of n, radius, resolution must be set. "0" is a special value that means "not set". n -- number of bits in the representation (must be > w) radius -- inputs separated by more than this distance in log space will have non-overlapping representations resolution -- The minimum change in scaled value needed to produce a change in encoding. This should be specified in log space. For example, the scaled values 10 and 11 will be distinguishable in the output. In terms of the original input values, this means 10^1 (1) and 10^1.1 (1.25) will be distinguishable. name -- an optional string which will become part of the description verbosity -- level of debugging output you want the encoder to provide. clipInput -- if true, non-periodic inputs smaller than minval or greater than maxval will be clipped to minval/maxval forced -- (default False), if True, skip some safety checks """ def __init__(self, w=5, minval=1e-07, maxval=10000, periodic=False, n=0, radius=0, resolution=0, name="log", verbosity=0, clipInput=True, forced=False): # Lower bound for log encoding near machine precision limit lowLimit = 1e-07 # Limit minval as log10(0) is undefined. if minval < lowLimit: minval = lowLimit # Check that minval is still lower than maxval if not minval < maxval: raise ValueError( "Max val must be larger than min val or the lower limit " "for this encoder %.7f" % lowLimit) self.encoders = None self.verbosity = verbosity # Scale values for calculations within the class self.minScaledValue = math.log10(minval) self.maxScaledValue = math.log10(maxval) if not self.maxScaledValue > self.minScaledValue: raise ValueError( "Max val must be larger, in log space, than min val.") self.clipInput = clipInput self.minval = minval self.maxval = maxval self.encoder = ScalarEncoder(w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, n=n, radius=radius, resolution=resolution, verbosity=self.verbosity, clipInput=self.clipInput, forced=forced) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None ############################################################################ def getWidth(self): return self.width ############################################################################ def getDescription(self): return self.description ############################################################################ def getDecoderOutputFieldTypes(self): """ Encoder class virtual method override """ return (FieldMetaType.float, ) ############################################################################ def _getScaledValue(self, inpt): """ Convert the input, which is in normal space, into log space """ if inpt == SENTINEL_VALUE_FOR_MISSING_DATA: return None else: val = inpt if val < self.minval: val = self.minval elif val > self.maxval: val = self.maxval scaledVal = math.log10(val) return scaledVal ############################################################################ def getBucketIndices(self, inpt): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: return [None] else: return self.encoder.getBucketIndices(scaledVal) ############################################################################ def encodeIntoArray(self, inpt, output): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: output[0:] = 0 else: self.encoder.encodeIntoArray(scaledVal, output) if self.verbosity >= 2: print "input:", inpt, "scaledVal:", scaledVal, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) ############################################################################ def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert (len(fieldsDict) == 1) # Convert each range into normal space (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] for (minV, maxV) in inRanges: outRanges.append((math.pow(10, minV), math.pow(10, maxV))) # Generate a text description of the ranges desc = "" numRanges = len(outRanges) for i in xrange(numRanges): if outRanges[i][0] != outRanges[i][1]: desc += "%.2f-%.2f" % (outRanges[i][0], outRanges[i][1]) else: desc += "%.2f" % (outRanges[i][0]) if i < numRanges - 1: desc += ", " # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) ############################################################################ def getBucketValues(self): """ See the function description in base.py """ # Need to re-create? if self._bucketValues is None: scaledValues = self.encoder.getBucketValues() self._bucketValues = [] for scaledValue in scaledValues: value = math.pow(10, scaledValue) self._bucketValues.append(value) return self._bucketValues ############################################################################ def getBucketInfo(self, buckets): """ See the function description in base.py """ scaledResult = self.encoder.getBucketInfo(buckets)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return [ EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding) ] ############################################################################ def topDownCompute(self, encoded): """ See the function description in base.py """ scaledResult = self.encoder.topDownCompute(encoded)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding) ############################################################################ def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py """ # Compute the percent error in log space if expValues[0] > 0: expValue = math.log10(expValues[0]) else: expValue = self.minScaledValue if actValues[0] > 0: actValue = math.log10(actValues[0]) else: actValue = self.minScaledValue if fractional: err = abs(expValue - actValue) pctErr = err / (self.maxScaledValue - self.minScaledValue) pctErr = min(1.0, pctErr) closeness = 1.0 - pctErr else: err = abs(expValue - actValue) closeness = err #print "log::", "expValue:", expValues[0], "actValue:", actValues[0], \ # "closeness", closeness #import pdb; pdb.set_trace() return numpy.array([closeness]) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.minScaledValue = proto.minScaledValue encoder.maxScaledValue = proto.maxScaledValue encoder.clipInput = proto.clipInput encoder.minval = proto.minval encoder.maxval = proto.maxval encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.name = proto.name encoder.width = encoder.encoder.getWidth() encoder.description = [(encoder.name, 0)] encoder._bucketValues = None return encoder def write(self, proto): proto.verbosity = self.verbosity proto.minScaledValue = self.minScaledValue proto.maxScaledValue = self.maxScaledValue proto.clipInput = self.clipInput proto.minval = self.minval proto.maxval = self.maxval self.encoder.write(proto.encoder) proto.name = self.name
class CategoryEncoder(Encoder): """ Encodes a list of discrete categories (described by strings), that aren't related to each other, so we never emit a mixture of categories. The value of zero is reserved for "unknown category" Internally we use a :class:`.ScalarEncoder` with a radius of 1, but since we only encode integers, we never get mixture outputs. The :class:`.SDRCategoryEncoder` uses a different method to encode categories. :param categoryList: list of discrete string categories :param forced: if True, skip checks for parameters' settings; see :class:`.ScalarEncoder` for details. (default False) """ def __init__(self, w, categoryList, name="category", verbosity=0, forced=False): self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() self.indexToCategory = dict() self.indexToCategory[0] = UNKNOWN for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i+1 self.indexToCategory[i+1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False, forced=forced) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. #return (FieldMetaType.string,) return (FieldMetaType.integer,) def getWidth(self): return self.width def getDescription(self): return self.description def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([None]) else: return numpy.array([self.categoryToIndex.get(input, 0)]) def getBucketIndices(self, input): """ See method description in base.py """ # Get the bucket index from the underlying scalar encoder if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: return self.encoder.getBucketIndices(self.categoryToIndex.get(input, 0)) def encodeIntoArray(self, input, output): # if not found, we encode category 0 if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:] = 0 val = "<missing>" else: val = self.categoryToIndex.get(input, 0) self.encoder.encodeIntoArray(val, output) if self.verbosity >= 2: print "input:", input, "va:", val, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert(len(fieldsDict) == 1) # Get the list of categories the scalar values correspond to and # generate the description from the category name(s). (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] desc = "" for (minV, maxV) in inRanges: minV = int(round(minV)) maxV = int(round(maxV)) outRanges.append((minV, maxV)) while minV <= maxV: if len(desc) > 0: desc += ", " desc += self.indexToCategory[minV] minV += 1 # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) def closenessScores(self, expValues, actValues, fractional=True,): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) def getBucketValues(self): """ See the function description in base.py """ if self._bucketValues is None: numBuckets = len(self.encoder.getBucketValues()) self._bucketValues = [] for bucketIndex in range(numBuckets): self._bucketValues.append(self.getBucketInfo([bucketIndex])[0].value) return self._bucketValues def getBucketInfo(self, buckets): """ See the function description in base.py """ # For the category encoder, the bucket index is the category index bucketInfo = self.encoder.getBucketInfo(buckets)[0] categoryIndex = int(round(bucketInfo.value)) category = self.indexToCategory[categoryIndex] return [EncoderResult(value=category, scalar=categoryIndex, encoding=bucketInfo.encoding)] def topDownCompute(self, encoded): """ See the function description in base.py """ encoderResult = self.encoder.topDownCompute(encoded)[0] value = encoderResult.value categoryIndex = int(round(value)) category = self.indexToCategory[categoryIndex] return EncoderResult(value=category, scalar=categoryIndex, encoding=encoderResult.encoding) @classmethod def getSchema(cls): return CategoryEncoderProto @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.width = proto.width encoder.description = [(proto.name, 0)] encoder.name = proto.name encoder.indexToCategory = {x.index: x.category for x in proto.indexToCategory} encoder.categoryToIndex = {category: index for index, category in encoder.indexToCategory.items() if category != UNKNOWN} encoder._topDownMappingM = None encoder.ncategories = len(proto.indexToCategory) encoder._bucketValues = None encoder.encoders = None return encoder def write(self, proto): proto.width = self.width proto.indexToCategory = [ {"index": index, "category": category} for index, category in self.indexToCategory.items() ] proto.name = self.name proto.verbosity = self.verbosity self.encoder.write(proto.encoder)
class CategoryEncoder(Encoder): """Encodes a list of discrete categories (described by strings), that aren't related to each other, so we never emit a mixture of categories. The value of zero is reserved for "unknown category" Internally we use a ScalarEncoder with a radius of 1, but since we only encode integers, we never get mixture outputs. The SDRCategoryEncoder uses a different method to encode categories""" def __init__(self, w, categoryList, name="category", verbosity=0, forced=False): """params: forced (default False) : if True, skip checks for parameters' settings; see encoders/scalar.py for details """ self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() self.indexToCategory = dict() self.indexToCategory[0] = UNKNOWN for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i + 1 self.indexToCategory[i + 1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False, forced=forced) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. #return (FieldMetaType.string,) return (FieldMetaType.integer, ) def getWidth(self): return self.width def getDescription(self): return self.description def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([None]) else: return numpy.array([self.categoryToIndex.get(input, 0)]) def getBucketIndices(self, input): """ See method description in base.py """ # Get the bucket index from the underlying scalar encoder if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: return self.encoder.getBucketIndices( self.categoryToIndex.get(input, 0)) def encodeIntoArray(self, input, output): # if not found, we encode category 0 if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:] = 0 val = "<missing>" else: val = self.categoryToIndex.get(input, 0) self.encoder.encodeIntoArray(val, output) if self.verbosity >= 2: print "input:", input, "va:", val, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert (len(fieldsDict) == 1) # Get the list of categories the scalar values correspond to and # generate the description from the category name(s). (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] desc = "" for (minV, maxV) in inRanges: minV = int(round(minV)) maxV = int(round(maxV)) outRanges.append((minV, maxV)) while minV <= maxV: if len(desc) > 0: desc += ", " desc += self.indexToCategory[minV] minV += 1 # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) def closenessScores( self, expValues, actValues, fractional=True, ): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) def getBucketValues(self): """ See the function description in base.py """ if self._bucketValues is None: numBuckets = len(self.encoder.getBucketValues()) self._bucketValues = [] for bucketIndex in range(numBuckets): self._bucketValues.append( self.getBucketInfo([bucketIndex])[0].value) return self._bucketValues def getBucketInfo(self, buckets): """ See the function description in base.py """ # For the category encoder, the bucket index is the category index bucketInfo = self.encoder.getBucketInfo(buckets)[0] categoryIndex = int(round(bucketInfo.value)) category = self.indexToCategory[categoryIndex] return [ EncoderResult(value=category, scalar=categoryIndex, encoding=bucketInfo.encoding) ] def topDownCompute(self, encoded): """ See the function description in base.py """ encoderResult = self.encoder.topDownCompute(encoded)[0] value = encoderResult.value categoryIndex = int(round(value)) category = self.indexToCategory[categoryIndex] return EncoderResult(value=category, scalar=categoryIndex, encoding=encoderResult.encoding) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.width = proto.width encoder.description = [(proto.name, 0)] encoder.name = proto.name encoder.indexToCategory = { x.index: x.category for x in proto.indexToCategory } encoder.categoryToIndex = { category: index for index, category in encoder.indexToCategory.items() if category != UNKNOWN } encoder._topDownMappingM = None encoder._bucketValues = None return encoder def write(self, proto): proto.width = self.width proto.indexToCategory = [{ "index": index, "category": category } for index, category in self.indexToCategory.items()] proto.name = self.name proto.verbosity = self.verbosity self.encoder.write(proto.encoder)
class LogEncoder(Encoder): """ This class wraps the ScalarEncoder class. A Log encoder represents a floating point value on a logarithmic scale. valueToEncode = log10(input) w -- number of bits to set in output minval -- minimum input value. must be greater than 0. Lower values are reset to this value maxval -- maximum input value (input is strictly less if periodic == True) periodic -- If true, then the input value "wraps around" such that minval = maxval For a periodic value, the input must be strictly less than maxval, otherwise maxval is a true upper bound. Exactly one of n, radius, resolution must be set. "0" is a special value that means "not set". n -- number of bits in the representation (must be > w) radius -- inputs separated by more than this distance in log space will have non-overlapping representations resolution -- The minimum change in scaled value needed to produce a change in encoding. This should be specified in log space. For example, the scaled values 10 and 11 will be distinguishable in the output. In terms of the original input values, this means 10^1 (1) and 10^1.1 (1.25) will be distinguishable. name -- an optional string which will become part of the description verbosity -- level of debugging output you want the encoder to provide. clipInput -- if true, non-periodic inputs smaller than minval or greater than maxval will be clipped to minval/maxval forced -- (default False), if True, skip some safety checks """ def __init__( self, w=5, minval=1e-07, maxval=10000, periodic=False, n=0, radius=0, resolution=0, name="log", verbosity=0, clipInput=True, forced=False, ): # Lower bound for log encoding near machine precision limit lowLimit = 1e-07 # Limit minval as log10(0) is undefined. if minval < lowLimit: minval = lowLimit # Check that minval is still lower than maxval if not minval < maxval: raise ValueError( "Max val must be larger than min val or the lower limit " "for this encoder %.7f" % lowLimit ) self.encoders = None self.verbosity = verbosity # Scale values for calculations within the class self.minScaledValue = math.log10(minval) self.maxScaledValue = math.log10(maxval) if not self.maxScaledValue > self.minScaledValue: raise ValueError("Max val must be larger, in log space, than min val.") self.clipInput = clipInput self.minval = minval self.maxval = maxval self.encoder = ScalarEncoder( w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, n=n, radius=radius, resolution=resolution, verbosity=self.verbosity, clipInput=self.clipInput, forced=forced, ) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None ############################################################################ def getWidth(self): return self.width ############################################################################ def getDescription(self): return self.description ############################################################################ def getDecoderOutputFieldTypes(self): """ Encoder class virtual method override """ return (FieldMetaType.float,) ############################################################################ def _getScaledValue(self, inpt): """ Convert the input, which is in normal space, into log space """ if inpt == SENTINEL_VALUE_FOR_MISSING_DATA: return None else: val = inpt if val < self.minval: val = self.minval elif val > self.maxval: val = self.maxval scaledVal = math.log10(val) return scaledVal ############################################################################ def getBucketIndices(self, inpt): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: return [None] else: return self.encoder.getBucketIndices(scaledVal) ############################################################################ def encodeIntoArray(self, inpt, output): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: output[0:] = 0 else: self.encoder.encodeIntoArray(scaledVal, output) if self.verbosity >= 2: print "input:", inpt, "scaledVal:", scaledVal, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) ############################################################################ def decode(self, encoded, parentFieldName=""): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert len(fieldsDict) == 1 # Convert each range into normal space (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] for (minV, maxV) in inRanges: outRanges.append((math.pow(10, minV), math.pow(10, maxV))) # Generate a text description of the ranges desc = "" numRanges = len(outRanges) for i in xrange(numRanges): if outRanges[i][0] != outRanges[i][1]: desc += "%.2f-%.2f" % (outRanges[i][0], outRanges[i][1]) else: desc += "%.2f" % (outRanges[i][0]) if i < numRanges - 1: desc += ", " # Return result if parentFieldName != "": fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) ############################################################################ def getBucketValues(self): """ See the function description in base.py """ # Need to re-create? if self._bucketValues is None: scaledValues = self.encoder.getBucketValues() self._bucketValues = [] for scaledValue in scaledValues: value = math.pow(10, scaledValue) self._bucketValues.append(value) return self._bucketValues ############################################################################ def getBucketInfo(self, buckets): """ See the function description in base.py """ scaledResult = self.encoder.getBucketInfo(buckets)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return [EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding)] ############################################################################ def topDownCompute(self, encoded): """ See the function description in base.py """ scaledResult = self.encoder.topDownCompute(encoded)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return EncoderResult(value=value, scalar=value, encoding=scaledResult.encoding) ############################################################################ def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py """ # Compute the percent error in log space if expValues[0] > 0: expValue = math.log10(expValues[0]) else: expValue = self.minScaledValue if actValues[0] > 0: actValue = math.log10(actValues[0]) else: actValue = self.minScaledValue if fractional: err = abs(expValue - actValue) pctErr = err / (self.maxScaledValue - self.minScaledValue) pctErr = min(1.0, pctErr) closeness = 1.0 - pctErr else: err = abs(expValue - actValue) closeness = err # print "log::", "expValue:", expValues[0], "actValue:", actValues[0], \ # "closeness", closeness # import pdb; pdb.set_trace() return numpy.array([closeness])
class CategoryEncoder(Encoder): """Encodes a list of discrete categories (described by strings), that aren't related to each other, so we never emit a mixture of categories. The value of zero is reserved for "unknown category" Internally we use a ScalarEncoder with a radius of 1, but since we only encode integers, we never get mixture outputs. The SDRCategoryEncoder uses a different method to encode categories""" def __init__(self, w, categoryList, name="category", verbosity=0, forced=False): """params: forced (default False) : if True, skip checks for parameters' settings; see encoders/scalar.py for details """ self.encoders = None self.verbosity = verbosity # number of categories includes "unknown" self.ncategories = len(categoryList) + 1 self.categoryToIndex = dict() # check_later: what is the purpose of categoryToIndex and indexToCategory? self.indexToCategory = dict() self.indexToCategory[0] = UNKNOWN for i in xrange(len(categoryList)): self.categoryToIndex[categoryList[i]] = i+1 self.indexToCategory[i+1] = categoryList[i] self.encoder = ScalarEncoder(w, minval=0, maxval=self.ncategories - 1, radius=1, periodic=False, forced=forced) self.width = w * self.ncategories assert self.encoder.getWidth() == self.width self.description = [(name, 0)] self.name = name # These are used to support the topDownCompute method self._topDownMappingM = None # This gets filled in by getBucketValues self._bucketValues = None ############################################################################ def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. #return (FieldMetaType.string,) return (FieldMetaType.integer,) ############################################################################ def getWidth(self): return self.width ############################################################################ def getDescription(self): return self.description ############################################################################ def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([None]) else: return numpy.array([self.categoryToIndex.get(input, 0)]) # to_note: returns the scalar value of the input, as stored in the categoryToIndex # Will return in the format of a numpy array (e.g. [1] or [2]), return [0] if the # input does not match with any of the key in categoryToIndex dictionary ############################################################################ def getBucketIndices(self, input): """ See method description in base.py """ # Get the bucket index from the underlying scalar encoder if input == SENTINEL_VALUE_FOR_MISSING_DATA: return [None] else: return self.encoder.getBucketIndices(self.categoryToIndex.get(input, 0)) # to_note: get the first ON bit from the ScalarEncoder for a given input. # Unknown value will have the first ON bit at position 1, then other values at k*w # Value NONE will have all 0s # problem_with_this_approach: this approach might be fast, but treating # category encoding as rigid scalar encoding might make it hard for learning ############################################################################ def encodeIntoArray(self, input, output): # if not found, we encode category 0 if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:] = 0 val = "<missing>" else: val = self.categoryToIndex.get(input, 0) self.encoder.encodeIntoArray(val, output) if self.verbosity >= 2: print "input:", input, "va:", val, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) ############################################################################ def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert(len(fieldsDict) == 1) # Get the list of categories the scalar values correspond to and # generate the description from the category name(s). (inRanges, inDesc) = fieldsDict.values()[0] # to_note: dict.values() returns values in [list] form, that's why we need [0] outRanges = [] desc = "" for (minV, maxV) in inRanges: minV = int(round(minV)) maxV = int(round(maxV)) outRanges.append((minV, maxV)) while minV <= maxV: if len(desc) > 0: desc += ", " desc += self.indexToCategory[minV] minV += 1 """ ## Test with noisy encoding (very likely if such encoding comes from output of the predicting process) catfish = numpy.zeros(20, 'int') # catfish[5:10] = 1 # catfish[15:20] = 1 catfish[5:10] = 1 catfish[8] = 1 catfish[11] = 1 catfish[14] = 1 catfish[17] = 1 print "Cat fish =", catfish; print ## Note: this kind of encoding is highly unstable, for a little noisy output like this [0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 0 0 1 0 0], ## it should safely generate 'cat'. However, it generates 'cat', 'dog', 'fish'. To improve this encoding/decoding scheme, ## we might need to replace the filling in process, even might need to think about other way to process information. ## problem_with_this_approach """ # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) ############################################################################ def closenessScores(self, expValues, actValues, fractional=True,): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) ############################################################################ def getBucketValues(self): """ See the function description in base.py """ if self._bucketValues is None: numBuckets = len(self.encoder.getBucketValues()) self._bucketValues = [] for bucketIndex in range(numBuckets): self._bucketValues.append(self.getBucketInfo([bucketIndex])[0].value) # to_note: list of category corresponding to bucket indices # each bucket is a number that is spaced (radius/w) each other return self._bucketValues ############################################################################ def getBucketInfo(self, buckets): """ See the function description in base.py """ # For the category encoder, the bucket index is the category index bucketInfo = self.encoder.getBucketInfo(buckets)[0] categoryIndex = int(round(bucketInfo.value)) category = self.indexToCategory[categoryIndex] # to_note: map the bucket index to category return [EncoderResult(value=category, scalar=categoryIndex, encoding=bucketInfo.encoding)] ############################################################################ def topDownCompute(self, encoded): """ See the function description in base.py """ encoderResult = self.encoder.topDownCompute(encoded)[0] # to_note: return EncoderResult, which includes the value (depend on ScalarEncoder) value = encoderResult.value categoryIndex = int(round(value)) category = self.indexToCategory[categoryIndex] return EncoderResult(value=category, scalar=categoryIndex, encoding=encoderResult.encoding) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.width = proto.width encoder.description = [(proto.name, 0)] encoder.name = proto.name encoder.indexToCategory = {x.index: x.category for x in proto.indexToCategory} encoder.categoryToIndex = {category: index for index, category in encoder.indexToCategory.items() if category != UNKNOWN} encoder._topDownMappingM = None encoder._bucketValues = None return encoder def write(self, proto): proto.width = self.width proto.indexToCategory = [ {"index": index, "category": category} for index, category in self.indexToCategory.items() ] proto.name = self.name proto.verbosity = self.verbosity self.encoder.write(proto.encoder)
class LogEncoder(Encoder): """ This class wraps the :class:`.ScalarEncoder`. A Log encoder represents a floating point value on a logarithmic scale. .. code-block:: python valueToEncode = log10(input) :param resolution: The minimum change in scaled value needed to produce a change in encoding. This should be specified in log space. For example, the scaled values 10 and 11 will be distinguishable in the output. In terms of the original input values, this means 10^1 (1) and 10^1.1 (1.25) will be distinguishable. :param radius: inputs separated by more than this distance in log space will have non-overlapping representations """ def __init__(self, w=5, minval=1e-07, maxval=10000, periodic=False, n=0, radius=0, resolution=0, name="log", verbosity=0, clipInput=True, forced=False): # Lower bound for log encoding near machine precision limit lowLimit = 1e-07 # Limit minval as log10(0) is undefined. if minval < lowLimit: minval = lowLimit # Check that minval is still lower than maxval if not minval < maxval: raise ValueError("Max val must be larger than min val or the lower limit " "for this encoder %.7f" % lowLimit) self.encoders = None self.verbosity = verbosity # Scale values for calculations within the class self.minScaledValue = math.log10(minval) self.maxScaledValue = math.log10(maxval) if not self.maxScaledValue > self.minScaledValue: raise ValueError("Max val must be larger, in log space, than min val.") self.clipInput = clipInput self.minval = minval self.maxval = maxval self.encoder = ScalarEncoder(w=w, minval=self.minScaledValue, maxval=self.maxScaledValue, periodic=False, n=n, radius=radius, resolution=resolution, verbosity=self.verbosity, clipInput=self.clipInput, forced=forced) self.width = self.encoder.getWidth() self.description = [(name, 0)] self.name = name # This list is created by getBucketValues() the first time it is called, # and re-created whenever our buckets would be re-arranged. self._bucketValues = None def getWidth(self): return self.width def getDescription(self): return self.description def getDecoderOutputFieldTypes(self): """ Encoder class virtual method override """ return (FieldMetaType.float, ) def _getScaledValue(self, inpt): """ Convert the input, which is in normal space, into log space """ if inpt == SENTINEL_VALUE_FOR_MISSING_DATA: return None else: val = inpt if val < self.minval: val = self.minval elif val > self.maxval: val = self.maxval scaledVal = math.log10(val) return scaledVal def getBucketIndices(self, inpt): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: return [None] else: return self.encoder.getBucketIndices(scaledVal) def encodeIntoArray(self, inpt, output): """ See the function description in base.py """ # Get the scaled value scaledVal = self._getScaledValue(inpt) if scaledVal is None: output[0:] = 0 else: self.encoder.encodeIntoArray(scaledVal, output) if self.verbosity >= 2: print "input:", inpt, "scaledVal:", scaledVal, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ # Get the scalar values from the underlying scalar encoder (fieldsDict, fieldNames) = self.encoder.decode(encoded) if len(fieldsDict) == 0: return (fieldsDict, fieldNames) # Expect only 1 field assert(len(fieldsDict) == 1) # Convert each range into normal space (inRanges, inDesc) = fieldsDict.values()[0] outRanges = [] for (minV, maxV) in inRanges: outRanges.append((math.pow(10, minV), math.pow(10, maxV))) # Generate a text description of the ranges desc = "" numRanges = len(outRanges) for i in xrange(numRanges): if outRanges[i][0] != outRanges[i][1]: desc += "%.2f-%.2f" % (outRanges[i][0], outRanges[i][1]) else: desc += "%.2f" % (outRanges[i][0]) if i < numRanges-1: desc += ", " # Return result if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (outRanges, desc)}, [fieldName]) def getBucketValues(self): """ See the function description in base.py """ # Need to re-create? if self._bucketValues is None: scaledValues = self.encoder.getBucketValues() self._bucketValues = [] for scaledValue in scaledValues: value = math.pow(10, scaledValue) self._bucketValues.append(value) return self._bucketValues def getBucketInfo(self, buckets): """ See the function description in base.py """ scaledResult = self.encoder.getBucketInfo(buckets)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return [EncoderResult(value=value, scalar=value, encoding = scaledResult.encoding)] def topDownCompute(self, encoded): """ See the function description in base.py """ scaledResult = self.encoder.topDownCompute(encoded)[0] scaledValue = scaledResult.value value = math.pow(10, scaledValue) return EncoderResult(value=value, scalar=value, encoding = scaledResult.encoding) def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py """ # Compute the percent error in log space if expValues[0] > 0: expValue = math.log10(expValues[0]) else: expValue = self.minScaledValue if actValues [0] > 0: actValue = math.log10(actValues[0]) else: actValue = self.minScaledValue if fractional: err = abs(expValue - actValue) pctErr = err / (self.maxScaledValue - self.minScaledValue) pctErr = min(1.0, pctErr) closeness = 1.0 - pctErr else: err = abs(expValue - actValue) closeness = err #print "log::", "expValue:", expValues[0], "actValue:", actValues[0], \ # "closeness", closeness #import pdb; pdb.set_trace() return numpy.array([closeness]) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.verbosity = proto.verbosity encoder.minScaledValue = proto.minScaledValue encoder.maxScaledValue = proto.maxScaledValue encoder.clipInput = proto.clipInput encoder.minval = proto.minval encoder.maxval = proto.maxval encoder.encoder = ScalarEncoder.read(proto.encoder) encoder.name = proto.name encoder.width = encoder.encoder.getWidth() encoder.description = [(encoder.name, 0)] encoder._bucketValues = None return encoder def write(self, proto): proto.verbosity = self.verbosity proto.minScaledValue = self.minScaledValue proto.maxScaledValue = self.maxScaledValue proto.clipInput = self.clipInput proto.minval = self.minval proto.maxval = self.maxval self.encoder.write(proto.encoder) proto.name = self.name
def main(): DIR = "./sim_data" # Odom Encoder xSDR = ScalarEncoder(w=21,minval=0,maxval=20,n=256) ySDR = ScalarEncoder(w=21,minval=0,maxval=20,n=256) xyWidth = xSDR.getWidth() + ySDR.getWidth() # Visual input D = np.loadtxt(DIR + '/seq_multi_loop_noise05_al5.txt', dtype='i', delimiter=',') numberImages = D[:,0].size nColumns = D[0,:].size #time.sleep(10) # Odom input odom = np.loadtxt(DIR + '/seq_multi_loop_noise05_al5_gt.txt', dtype='f', delimiter=',') x = odom[:,0] y = odom[:,1] # Encoder Odom input odomSDR = np.zeros((numberImages,xyWidth), dtype=int) for i in range(1): _xSDR = np.zeros(xSDR.getWidth(), dtype=int) xSDR.encodeIntoArray(x[i], _xSDR) _ySDR = np.zeros(ySDR.getWidth(), dtype=int) ySDR.encodeIntoArray(y[i], _ySDR) odomSDR[i,:] = np.concatenate([_xSDR, _ySDR]) tm0 = TM( columnCount=nColumns, cellsPerColumn=4, initialPermanence=0.21, connectedPermanence=0.5, permanenceIncrement=0.1, permanenceDecrement=0.1, minThreshold=15, basalInputSize= 512, reducedBasalThreshold=1000, activationThreshold=1000, apicalInputSize=0, maxSynapsesPerSegment=-1, sampleSize=1, seed = 42 ) tm = TemporalMemory( # Must be the same dimensions as the SP columnDimensions=(2048,), # How many cells in each mini-column. cellsPerColumn=4, # A segment is active if it has >= activationThreshold connected synapses # that are active due to infActiveState activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.5, # Minimum number of active synapses for a segment to be considered during # search for the best-matching segments. minThreshold=1, # The max number of synapses added to a segment during learning maxNewSynapseCount=3, #permanenceIncrement=0.01, #permanenceDecrement=0.01, predictedSegmentDecrement=0.0005, maxSegmentsPerCell=3, maxSynapsesPerSegment=3, seed=42 ) #time.sleep(10) # Simple HTM parameters params = Params() params.maxPredDepth = 0 params.probAdditionalCon = 0.05 # probability for random connection params.nCellPerCol = 32 # number of cells per minicolumn params.nInConPerCol = int(round(np.count_nonzero(D) / D.shape[0])) #print params.nInConPerCol params.minColumnActivity = int(round(0.25*params.nInConPerCol)) params.nColsPerPattern = 10 # minimum number of active minicolumns k_min params.kActiveColumn = 100 # maximum number of active minicolumns k_max params.kMin = 1 # run HTM t = time.time() print ('Simple HTM') htm = MCN('htm',params) outputSDR = [] max_index = [] for i in range (min(numberImages,D.shape[0])): loop = 0 #print('\n-------- ITERATION %d ---------' %i) # skip empty vectors if np.count_nonzero(D[i,:]) == 0: print('empty vector, skip\n') continue loop += 1 #print D[i,:] htm.compute(D[i,:]) max_index.append(max(htm.winnerCells)) outputSDR.append(htm.winnerCells) elapsed = time.time() - t print("Elapsed time: %f seconds\n" %elapsed) # create output SDR matrix from HTM winner cell output M = np.zeros((len(outputSDR),max(max_index)+1), dtype=int) for i in range(len(outputSDR)): for j in range(len(outputSDR[i])): winner = outputSDR[i][j] M[i][winner] = 1 # Temporal Pooler descriptors print 'Temporal Pooler descriptors' D1_tm=[] id_max1=[] t = time.time() for i in range(min(numberImages,D.shape[0])): D1_sp = np.nonzero(D[i,:])[0] tm.compute(D1_sp, learn=True) activeCells = tm.getWinnerCells() D1_tm.append(activeCells) id_max1.append(max(activeCells)) elapsed = time.time() - t print( "Elapsed time: %f seconds\n" %elapsed) # create output SDR matrix from HTM winner cell output T = np.zeros((len(D1_tm),max(id_max1)+1), dtype=int) for i in range(len(D1_tm)): for j in range(len(D1_tm[i])): winner = D1_tm[i][j] T[i][winner] = 1 # Temporal Pooler - Distal connections print 'Temporal Pooler - Distal connections' D2_tm=[] id_max2=[] t = time.time() for i in range(min(numberImages,D.shape[0])): D2_sp = np.nonzero(D[i,:])[0] basalInputs = np.nonzero(odomSDR[i,:])[0] tm0.compute(sorted(D2_sp), sorted(basalInputs), apicalInput=(), basalGrowthCandidates=None, apicalGrowthCandidates=None, learn=True) activeCells2 = tm0.getWinnerCells() D2_tm.append(activeCells2) id_max2.append(max(activeCells2)) elapsed = time.time() - t print( "Elapsed time: %f seconds\n" %elapsed) # create output SDR matrix from HTM winner cell output T2 = np.zeros((len(D2_tm),max(id_max2)+1), dtype=int) for i in range(len(D2_tm)): for j in range(len(D2_tm[i])): winner = D2_tm[i][j] T2[i][winner] = 1 # Create ground truth and show precision-recall curves GT_data = np.loadtxt(DIR + '/seq_multi_loop_noNoise_gt.txt', dtype='i', delimiter=',',skiprows=1) GT = np.zeros((numberImages,numberImages), dtype=int) for i in range(GT.shape[0]): for j in range(i,GT.shape[1]): GT[i,j] = (np.any(GT_data[i,:] != GT_data[j,:])==False) # Results print ('Results') fig, ax = plt.subplots() S0 = evaluateSimilarity(D) P, R = createPR(S0,GT) ax.plot(R, P, label='InputSDR: (avgP=%f)' %np.trapz(P,R)) S1 = evaluateSimilarity(M) P, R = createPR(S1,GT) ax.plot(R, P, label='MCN (avgP=%f)' %np.trapz(P,R)) S2 = evaluateSimilarity(T) P, R = createPR(S2,GT) ax.plot(R, P, label='HTM (avgP=%f)' %np.trapz(P,R)) S3 = evaluateSimilarity(T2) P, R = createPR(S3,GT) ax.plot(R, P, label='HTM Distal (avgP=%f)' %np.trapz(P,R)) ax.legend() ax.grid(True) plt.xlabel("Recall") plt.ylabel("Precision") plt.show() '''
def buildSDR(arrayOfShapeTypes,arrayOfDistances,arrayOfAreas,numCentroids): #Scalar Encoder for the area areaEncoder = ScalarEncoder(5, 1e3, 3.8e3, periodic=False, n=80, radius=0, resolution=0, name=None, verbosity=0, clipInput=False, forced=True) #Scalar encoder for the distances between centroids distanceEncoder = ScalarEncoder(5, 30, 240, periodic=False, n=80, radius=0, resolution=0, name=None, verbosity=0, clipInput=False, forced=True) if numCentroids ==3: distanceBits0 = np.zeros(distanceEncoder.getWidth()) distanceEncoder.encodeIntoArray(arrayOfDistances[0],distanceBits0) distanceBits1 = np.zeros(distanceEncoder.getWidth()) distanceEncoder.encodeIntoArray(arrayOfDistances[1],distanceBits1) distanceBits2 = np.zeros(distanceEncoder.getWidth()) distanceEncoder.encodeIntoArray(arrayOfDistances[2],distanceBits2) if numCentroids ==2: distanceBits0 = np.zeros(distanceEncoder.getWidth()) distanceEncoder.encodeIntoArray(arrayOfDistances[0],distanceBits0) # Build the Triangle's base SDR. One hot encoding used for all SDRs. TriangleSDR = np.zeros(10) # Part A: Number of Sides [0 - 4] TriangleSDR[2] = 1 # Part B: Number of Neighbors [5 - 9] TriangleSDR[7] = 1 # Build the Circle's base SDR CircleSDR = np.zeros(10) # Part A: Number of Sides [0 - 4] CircleSDR[0] = 1 # Part B: Number of Neighbors [5 - 9] CircleSDR[7] = 1 # Build the Square's base SDR SquareSDR = np.zeros(10) # Part A: Number of Sides [0 - 4] SquareSDR[3] = 1 # Part B: Number of Neighbors [5 - 9] SquareSDR[7] = 1 arrayOfSDRs = np.array([]) for i in range(numCentroids): # Encode the area areaBits = np.zeros(areaEncoder.getWidth()) areaEncoder.encodeIntoArray(arrayOfAreas[i],areaBits) # Figure out the shape type via the CNN output then us an # if, elseif tree to decide which SDR to concatenate if arrayOfShapeTypes[i] == 0: #Its a Triangle tempSDR = np.concatenate(( TriangleSDR, areaBits)) elif arrayOfShapeTypes[i] == 1: #Its a Circle tempSDR = np.concatenate(( CircleSDR, areaBits)) elif arrayOfShapeTypes[i] == 2: #Its a Square tempSDR = np.concatenate(( SquareSDR, areaBits)) if numCentroids == 3: if i == 0: #Its the first item tempSDR = np.concatenate((tempSDR,distanceBits0,distanceBits2)) elif i == 1: #Its the second tempSDR = np.concatenate((tempSDR,distanceBits0,distanceBits1)) elif i == 2: #Its the third tempSDR = np.concatenate((tempSDR,distanceBits1,distanceBits2)) elif numCentroids == 2: if i == 0: #Its the first item tempSDR = np.concatenate((tempSDR,distanceBits0,np.zeros(80))) elif i == 1: #Its the second tempSDR = np.concatenate((tempSDR,distanceBits0,np.zeros(80))) elif numCentroids == 1: tempSDR = np.concatenate((tempSDR,np.zeros(160))) arrayOfSDRs = np.append(arrayOfSDRs,tempSDR) if numCentroids == 2: arrayOfSDRs = np.append(arrayOfSDRs,np.zeros(250)) if numCentroids == 1: arrayOfSDRs = np.append(arrayOfSDRs,np.zeros(500)) # Concatenate all three SDRs #print(arrayOfSDRs) imageSDR = arrayOfSDRs return imageSDR