def testJSONSerialization(self): """ This test is to insure that Python can access the C++ serialization functions. Serialization is tested more completely in C++ unit tests. Just checking that Python can access it. """ p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 20 p.clipInput = True encoder1 = ScalarEncoder(p) filename = 'ScalarEncoder_testSerialization.json' encoder1.saveToFile(filename, "JSON") encoder2 = ScalarEncoder() encoder2.loadFromFile(filename, "JSON") value_to_encode = 69003 SDR_original = encoder1.encode(value_to_encode) SDR_loaded = encoder2.encode(value_to_encode) assert(SDR_original == SDR_loaded) os.remove(filename)
def testRadius(self): p = ScalarEncoderParameters() p.activeBits = 10 p.minimum = 0 p.maximum = 100 p.radius = 10 enc = ScalarEncoder(p) sdr1 = SDR( enc.parameters.size ) sdr2 = SDR( enc.parameters.size ) enc.encode( 77, sdr1 ) enc.encode( 77, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 10 ) enc.encode( 0, sdr1 ) enc.encode( 1, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 9 ) enc.encode( 60, sdr1 ) enc.encode( 69, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 1 ) enc.encode( 45, sdr1 ) enc.encode( 55, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 0 )
def testStatistics(self): p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 20 p.clipInput = True enc = ScalarEncoder( p ) del p out = SDR( enc.parameters.size ) mtr = Metrics(out, 9999) # The activation frequency of bits near the endpoints of the range is a # little weird, because the bits at the very end are not used as often # as the ones in the middle of the range, unless clipInputs is enabled. # If clipInputs is enabled then the bits 1 radius from the end get used # twice as often as the should because they respond to inputs off # outside of the valid range as well as inputs inside of the range. for i in np.linspace( enc.parameters.minimum - enc.parameters.radius / 2, enc.parameters.maximum + enc.parameters.radius / 2, 100 + 10 ): enc.encode( i, out ) # print( i, out.sparse ) print(str(mtr)) assert( mtr.sparsity.min() > .95 * .10 ) assert( mtr.sparsity.max() < 1.05 * .10 ) assert( mtr.activationFrequency.min() > .50 * .10 ) assert( mtr.activationFrequency.max() < 1.75 * .10 ) assert( mtr.overlap.min() > .85 )
def testBadParameters(self): # Start with sane parameters. p = ScalarEncoderParameters() p.size = 10 p.activeBits = 2 p.minimum = 0 p.maximum = 1 ScalarEncoder(p) # Check a lot of bad parameters p.activeBits = 12 # Can not activate more bits than are in the SDR. with self.assertRaises(RuntimeError): ScalarEncoder(p) p.activeBits = 0 # not enough active bits with self.assertRaises(RuntimeError): ScalarEncoder(p) p.activeBits = 1 p.size = 0 # not enough bits with self.assertRaises(RuntimeError): ScalarEncoder(p) p.activeBits = 2 p.maximum = -1 # Maximum is less than the minimum with self.assertRaises(RuntimeError): ScalarEncoder(p) p.maximum = 1 p.size = 0 p.activeBits = 0 p.sparsity = .1 # Specify sparsity without output size with self.assertRaises(RuntimeError): ScalarEncoder(p) p.size = 10 p.activeBits = 2 p.sparsity = 0 p.sparsity = .2 # Sparsity & num activeBits specified with self.assertRaises(RuntimeError): ScalarEncoder(p) p.sparsity = 0 p.clipInput = True # Incompatible features... p.periodic = True with self.assertRaises(RuntimeError): ScalarEncoder(p) p.clipInput = False p.periodic = False p.radius = 1 # Size specified too many times with self.assertRaises(RuntimeError): ScalarEncoder(p) p.radius = 0 p.resolution = 1 # Size specified too many times with self.assertRaises(RuntimeError): ScalarEncoder(p) p.resolution = 0
def testNaNs(self): p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 100 enc = ScalarEncoder(p) sdr = SDR( 100 ) enc.encode( float("nan"), sdr ) assert( sdr.getSum() == 0 )
def testEncode(self): p = ScalarEncoderParameters() p.size = 10 p.activeBits = 3 p.minimum = 0 p.maximum = 1 enc = ScalarEncoder(p) sdr = SDR( 10 ) enc.encode( 0, sdr ) assert( list(sdr.sparse) == [0, 1, 2] ) sdr2 = enc.encode( 1 ) assert( list(sdr2.sparse) == [7, 8, 9] )
def testCategories(self): # Test two categories. p = ScalarEncoderParameters() p.minimum = 0 p.maximum = 1 p.activeBits = 3 p.radius = 1 enc = ScalarEncoder(p) sdr = SDR(enc.dimensions) zero = enc.encode(0) one = enc.encode(1) assert (zero.getOverlap(one) == 0) # Test three categories. p = ScalarEncoderParameters() p.minimum = 0 p.maximum = 2 p.activeBits = 3 p.radius = 1 enc = ScalarEncoder(p) sdr = SDR(enc.dimensions) zero = enc.encode(0) one = enc.encode(1) two = enc.encode(2) assert (zero.getOverlap(one) == 0) assert (one.getOverlap(two) == 0) assert (two.getSum() == 3)
def testClipInput(self): p = ScalarEncoderParameters() p.size = 345 p.sparsity = .05 p.minimum = 0 p.maximum = 1 p.clipInput = 1 enc = ScalarEncoder(p) sdr1 = SDR(345) sdr2 = SDR(345) enc.encode(0, sdr1) enc.encode(-1, sdr2) assert (sdr1 == sdr2) enc.encode(1, sdr1) enc.encode(10, sdr2) assert (sdr1 == sdr2)
def testBadEncode(self): # Test bad SDR p = ScalarEncoderParameters() p.size = 10 p.activeBits = 2 p.minimum = 0 p.maximum = 1 enc = ScalarEncoder(p) good = SDR( 10 ) bad = SDR( 5 ) enc.encode( .25, good ) with self.assertRaises(RuntimeError): enc.encode( .25, bad ) # Test bad inputs, out of valid range & clipping disabled. with self.assertRaises(RuntimeError): enc.encode( -.0001, good ) with self.assertRaises(RuntimeError): enc.encode( 1.0001, good )
def testPickle(self): p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 20 p.clipInput = True enc = ScalarEncoder( p ) import pickle picklestr = pickle.dumps(enc) enc2 = pickle.loads(picklestr) #assert enc.parameters == enc2.parameters assert enc.size == enc2.size out = SDR( enc.parameters.size ) out2 = SDR( enc2.parameters.size ) enc.encode(10, out) enc2.encode(10, out2) assert out == out2
def testPeriodic(self): p = ScalarEncoderParameters() p.size = 100 p.activeBits = 10 p.minimum = 0 p.maximum = 20 p.periodic = True enc = ScalarEncoder( p ) out = SDR( enc.parameters.size ) mtr = Metrics(out, 9999) for i in range(201 * 10 + 1): x = (i % 201) / 10. enc.encode( x, out ) # print( x, out.sparse ) print(str(mtr)) assert( mtr.sparsity.min() > .95 * .10 ) assert( mtr.sparsity.max() < 1.05 * .10 ) assert( mtr.activationFrequency.min() > .9 * .10 ) assert( mtr.activationFrequency.max() < 1.1 * .10 ) assert( mtr.overlap.min() > .85 )
def ScalarEncoderGenerator(mini, maxi, size, spars=-1): # if sparsity is not given, don't allow overlapping bits. if spars == -1: # unsure if correct diff = maxi - mini spars = 1 / (diff + 1) params = ScalarEncoderParameters() params.minimum = mini params.maximum = maxi params.size = size params.sparsity = spars encoder = ScalarEncoder(params) return encoder
def testConstructor(self): p = ScalarEncoderParameters() p.size = 1000 p.activeBits = 20 p.minimum = 0 p.maximum = 345 enc = ScalarEncoder( p ) assert( enc.dimensions == [1000] ) assert( enc.size == 1000 ) assert( not enc.parameters.clipInput ) assert( not enc.parameters.periodic ) assert( abs(enc.parameters.sparsity - 20./1000) < .01 ) assert( abs(enc.parameters.radius - 7) < 1 ) assert( abs(enc.parameters.resolution - .35) < .1 )
def testResolution(self): p = ScalarEncoderParameters() p.activeBits = 10 p.minimum = 0 p.maximum = 100 p.resolution = .5 enc = ScalarEncoder(p) sdr1 = SDR( enc.parameters.size ) sdr2 = SDR( enc.parameters.size ) enc.encode( .0, sdr1 ) enc.encode( .1, sdr2 ) assert( sdr1 == sdr2 ) enc.encode( .0, sdr1 ) enc.encode( .6, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 9 ) enc.encode( 70, sdr1 ) enc.encode( 72.5, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 5 ) enc.encode( 70, sdr1 ) enc.encode( 75, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 0 ) enc.encode( 60, sdr1 ) enc.encode( 80, sdr2 ) assert( sdr1.getOverlap( sdr2 ) == 0 )
def __init__(self, season=0, dayOfWeek=0, weekend=0, holiday=0, timeOfDay=0, customDays=0, holidays=((12, 25), )): """ Each parameter describes one attribute to encode. By default, the attribute is not encoded. Argument season: (int | tuple) Season of the year, where units = day. - (int) width of attribute; default radius = 91.5 days (1 season) - (tuple) season[0] = width; season[1] = radius Argument dayOfWeek: (int | tuple) Day of week, where monday = 0, units = 1 day. - (int) width of attribute; default radius = 1 day - (tuple) dayOfWeek[0] = width; dayOfWeek[1] = radius Argument weekend: (int) Is a weekend or not. A block of bits either 0s or 1s. Note: the implementation treats "weekend" as starting Fri 6pm, till Sun midnight. - (int) width of attribute - TODO remove and replace by customDays=(width, ["Saturday", "Sunday"]) ? Argument holiday: (int) Is a holiday or not, boolean: 0, 1 - (int) width of attribute Argument timeOfday: (int | tuple) Time of day, where midnight = 0, units = hour. - (int) width of attribute: default radius = 4 hours - (tuple) timeOfDay[0] = width; timeOfDay[1] = radius Argument customDays: (tuple) A way to custom encode specific days of the week. - [0] (int) Width of attribute - [1] (str | list) Either a string representing a day of the week like "Monday" or "mon", or a list of these strings. Argument holidays: (list) a list of tuples for holidays. - Each holiday is either (month, day) or (year, month, day). The former will use the same month day every year eg: (12, 25) for Christmas. The latter will be a one off holiday eg: (2018, 4, 1) for Easter Sunday 2018 - By default the only holiday is December 25. """ self.size = 0 self.seasonEncoder = None if season != 0: p = ScalarEncoderParameters() # Ignore leapyear differences -- assume 366 days in a year # Radius = 91.5 days = length of season # Value is number of days since beginning of year (0 - 355) p.minimum = 0 p.maximum = 366 p.periodic = True try: activeBits, radius = season except TypeError: p.activeBits = season p.radius = 91.5 else: p.activeBits = season[0] p.radius = season[1] self.seasonEncoder = ScalarEncoder(p) self.size += self.seasonEncoder.size self.dayOfWeekEncoder = None if dayOfWeek != 0: p = ScalarEncoderParameters() # Value is day of week (floating point) # Radius is 1 day p.minimum = 0 p.maximum = 7 p.periodic = True try: activeBits, radius = dayOfWeek except TypeError: p.activeBits = dayOfWeek p.radius = 1 else: p.activeBits = dayOfWeek[0] p.radius = dayOfWeek[1] self.dayOfWeekEncoder = ScalarEncoder(p) self.size += self.dayOfWeekEncoder.size self.weekendEncoder = None if weekend != 0: p = ScalarEncoderParameters() # Binary value. p.minimum = 0 p.maximum = 1 p.category = True p.activeBits = weekend self.weekendEncoder = ScalarEncoder(p) self.size += self.weekendEncoder.size # Set up custom days encoder, first argument in tuple is width # second is either a single day of the week or a list of the days # you want encoded as ones. self.customDaysEncoder = None if customDays != 0: daysToParse = [] assert len( customDays) == 2, "Please provide a w and the desired days" if isinstance(customDays[1], list): daysToParse = customDays[1] elif isinstance(customDays[1], str): daysToParse = [customDays[1]] else: raise ValueError( "You must provide either a list of days or a single day") # Parse days self.customDays = [] for day in daysToParse: if (day.lower() in ["mon", "monday"]): self.customDays += [0] elif day.lower() in ["tue", "tuesday"]: self.customDays += [1] elif day.lower() in ["wed", "wednesday"]: self.customDays += [2] elif day.lower() in ["thu", "thursday"]: self.customDays += [3] elif day.lower() in ["fri", "friday"]: self.customDays += [4] elif day.lower() in ["sat", "saturday"]: self.customDays += [5] elif day.lower() in ["sun", "sunday"]: self.customDays += [6] else: raise ValueError( "Unable to understand %s as a day of week" % str(day)) p = ScalarEncoderParameters() p.activeBits = customDays[0] p.minimum = 0 p.maximum = 1 p.category = True self.customDaysEncoder = ScalarEncoder(p) self.size += self.customDaysEncoder.size self.holidayEncoder = None if holiday != 0: p = ScalarEncoderParameters() # A "continuous" binary value. = 1 on the holiday itself and smooth ramp # 0->1 on the day before the holiday and 1->0 on the day after the # holiday. p.minimum = 0 p.maximum = 1 p.radius = 1 p.activeBits = holiday self.holidayEncoder = ScalarEncoder(p) self.size += self.holidayEncoder.size for h in holidays: if not (hasattr(h, "__getitem__") or len(h) not in [2, 3]): raise ValueError( "Holidays must be an iterable of length 2 or 3") self.holidays = holidays self.timeOfDayEncoder = None if timeOfDay != 0: p = ScalarEncoderParameters() p.minimum = 0 p.maximum = 24 p.periodic = True # Value is time of day in hours # Radius = 4 hours, e.g. morning, afternoon, evening, early night, late # night, etc. try: activeBits, radius = timeOfDay except TypeError: p.activeBits = timeOfDay p.radius = 4 else: p.activeBits = timeOfDay[0] p.radius = timeOfDay[1] self.timeOfDayEncoder = ScalarEncoder(p) self.size += self.timeOfDayEncoder.size self.dimensions = (self.size, ) assert (self.size > 0)
class DateEncoder: """ A date encoder encodes a time and date. The input to a date encoder is a datetime.datetime object. The output is the concatenation of several sub- encodings, each of which encodes a different aspect of the date. Which sub- encodings are present, and details of those sub-encodings, are specified in the DateEncoder constructor. """ def __init__(self, season=0, dayOfWeek=0, weekend=0, holiday=0, timeOfDay=0, customDays=0, holidays=((12, 25), )): """ Each parameter describes one attribute to encode. By default, the attribute is not encoded. Argument season: (int | tuple) Season of the year, where units = day. - (int) width of attribute; default radius = 91.5 days (1 season) - (tuple) season[0] = width; season[1] = radius Argument dayOfWeek: (int | tuple) Day of week, where monday = 0, units = 1 day. - (int) width of attribute; default radius = 1 day - (tuple) dayOfWeek[0] = width; dayOfWeek[1] = radius Argument weekend: (int) Is a weekend or not. A block of bits either 0s or 1s. Note: the implementation treats "weekend" as starting Fri 6pm, till Sun midnight. - (int) width of attribute - TODO remove and replace by customDays=(width, ["Saturday", "Sunday"]) ? Argument holiday: (int) Is a holiday or not, boolean: 0, 1 - (int) width of attribute Argument timeOfday: (int | tuple) Time of day, where midnight = 0, units = hour. - (int) width of attribute: default radius = 4 hours - (tuple) timeOfDay[0] = width; timeOfDay[1] = radius Argument customDays: (tuple) A way to custom encode specific days of the week. - [0] (int) Width of attribute - [1] (str | list) Either a string representing a day of the week like "Monday" or "mon", or a list of these strings. Argument holidays: (list) a list of tuples for holidays. - Each holiday is either (month, day) or (year, month, day). The former will use the same month day every year eg: (12, 25) for Christmas. The latter will be a one off holiday eg: (2018, 4, 1) for Easter Sunday 2018 - By default the only holiday is December 25. """ self.size = 0 self.seasonEncoder = None if season != 0: p = ScalarEncoderParameters() # Ignore leapyear differences -- assume 366 days in a year # Radius = 91.5 days = length of season # Value is number of days since beginning of year (0 - 355) p.minimum = 0 p.maximum = 366 p.periodic = True try: activeBits, radius = season except TypeError: p.activeBits = season p.radius = 91.5 else: p.activeBits = season[0] p.radius = season[1] self.seasonEncoder = ScalarEncoder(p) self.size += self.seasonEncoder.size self.dayOfWeekEncoder = None if dayOfWeek != 0: p = ScalarEncoderParameters() # Value is day of week (floating point) # Radius is 1 day p.minimum = 0 p.maximum = 7 p.periodic = True try: activeBits, radius = dayOfWeek except TypeError: p.activeBits = dayOfWeek p.radius = 1 else: p.activeBits = dayOfWeek[0] p.radius = dayOfWeek[1] self.dayOfWeekEncoder = ScalarEncoder(p) self.size += self.dayOfWeekEncoder.size self.weekendEncoder = None if weekend != 0: p = ScalarEncoderParameters() # Binary value. p.minimum = 0 p.maximum = 1 p.category = True p.activeBits = weekend self.weekendEncoder = ScalarEncoder(p) self.size += self.weekendEncoder.size # Set up custom days encoder, first argument in tuple is width # second is either a single day of the week or a list of the days # you want encoded as ones. self.customDaysEncoder = None if customDays != 0: daysToParse = [] assert len( customDays) == 2, "Please provide a w and the desired days" if isinstance(customDays[1], list): daysToParse = customDays[1] elif isinstance(customDays[1], str): daysToParse = [customDays[1]] else: raise ValueError( "You must provide either a list of days or a single day") # Parse days self.customDays = [] for day in daysToParse: if (day.lower() in ["mon", "monday"]): self.customDays += [0] elif day.lower() in ["tue", "tuesday"]: self.customDays += [1] elif day.lower() in ["wed", "wednesday"]: self.customDays += [2] elif day.lower() in ["thu", "thursday"]: self.customDays += [3] elif day.lower() in ["fri", "friday"]: self.customDays += [4] elif day.lower() in ["sat", "saturday"]: self.customDays += [5] elif day.lower() in ["sun", "sunday"]: self.customDays += [6] else: raise ValueError( "Unable to understand %s as a day of week" % str(day)) p = ScalarEncoderParameters() p.activeBits = customDays[0] p.minimum = 0 p.maximum = 1 p.category = True self.customDaysEncoder = ScalarEncoder(p) self.size += self.customDaysEncoder.size self.holidayEncoder = None if holiday != 0: p = ScalarEncoderParameters() # A "continuous" binary value. = 1 on the holiday itself and smooth ramp # 0->1 on the day before the holiday and 1->0 on the day after the # holiday. p.minimum = 0 p.maximum = 1 p.radius = 1 p.activeBits = holiday self.holidayEncoder = ScalarEncoder(p) self.size += self.holidayEncoder.size for h in holidays: if not (hasattr(h, "__getitem__") or len(h) not in [2, 3]): raise ValueError( "Holidays must be an iterable of length 2 or 3") self.holidays = holidays self.timeOfDayEncoder = None if timeOfDay != 0: p = ScalarEncoderParameters() p.minimum = 0 p.maximum = 24 p.periodic = True # Value is time of day in hours # Radius = 4 hours, e.g. morning, afternoon, evening, early night, late # night, etc. try: activeBits, radius = timeOfDay except TypeError: p.activeBits = timeOfDay p.radius = 4 else: p.activeBits = timeOfDay[0] p.radius = timeOfDay[1] self.timeOfDayEncoder = ScalarEncoder(p) self.size += self.timeOfDayEncoder.size self.dimensions = (self.size, ) assert (self.size > 0) def reset(self): """ Does nothing, DateEncoder holds no state. """ pass def encode(self, inp, output=None): """ Argument inp: (datetime) representing the time being encoded """ if output is None: output = SDR(self.dimensions) else: assert (isinstance(output, SDR)) assert (output.dimensions == self.dimensions) if inp is None or (isinstance(inp, float) and math.isnan(inp)): output.zero() return output elif not isinstance(inp, datetime.datetime): raise ValueError("Input is type %s, expected datetime. Value: %s" % (type(inp), str(inp))) # ------------------------------------------------------------------------- # Encode each sub-field sdrs = [] timetuple = inp.timetuple() timeOfDay = timetuple.tm_hour + float(timetuple.tm_min) / 60.0 if self.seasonEncoder is not None: # Number the days starting at zero, intead of 1 like the datetime does. dayOfYear = timetuple.tm_yday - 1 # dayOfYear -= self.seasonEncoder.parameters.radius / 2. # Round towards the middle of the season. sdrs.append(self.seasonEncoder.encode(dayOfYear)) if self.dayOfWeekEncoder is not None: dayOfWeek = timetuple.tm_wday + (timeOfDay) / 24.0 dayOfWeek -= .5 # Round towards noon, not midnight sdrs.append(self.dayOfWeekEncoder.encode(dayOfWeek)) if self.weekendEncoder is not None: # saturday, sunday or friday evening if (timetuple.tm_wday == 6 or timetuple.tm_wday == 5 or (timetuple.tm_wday == 4 and timeOfDay > 18)): weekend = 1 else: weekend = 0 sdrs.append(self.weekendEncoder.encode(weekend)) if self.customDaysEncoder is not None: if timetuple.tm_wday in self.customDays: customDay = 1 else: customDay = 0 sdrs.append(self.customDaysEncoder.encode(customDay)) if self.holidayEncoder is not None: # A "continuous" binary value. = 1 on the holiday itself and smooth ramp # 0->1 on the day before the holiday and 1->0 on the day after the holiday. # holidays is a list of holidays that occur on a fixed date every year val = 0 for h in self.holidays: # hdate is midnight on the holiday if len(h) == 3: hdate = datetime.datetime(h[0], h[1], h[2], 0, 0, 0) else: hdate = datetime.datetime(timetuple.tm_year, h[0], h[1], 0, 0, 0) if inp > hdate: diff = inp - hdate if diff.days == 0: # return 1 on the holiday itself val = 1 break elif diff.days == 1: # ramp smoothly from 1 -> 0 on the next day val = 1.0 - (float(diff.seconds) / 86400) break else: diff = hdate - inp if diff.days == 0: # ramp smoothly from 0 -> 1 on the previous day val = 1.0 - (float(diff.seconds) / 86400) sdrs.append(self.holidayEncoder.encode(val)) if self.timeOfDayEncoder is not None: sdrs.append(self.timeOfDayEncoder.encode(timeOfDay)) if len(sdrs) > 1: output.concatenate(sdrs) else: output.setSDR(sdrs[0]) return output