def testPredictionDistributionOverlap(self): """ Test the distribution of predictions with overlapping input SDRs Here, we intend the classifier to learn the associations: SDR1 => bucketIdx 0 (30%) => bucketIdx 1 (30%) => bucketIdx 2 (40%) SDR2 => bucketIdx 1 (50%) => bucketIdx 3 (50%) SDR1 and SDR2 has 10% overlaps (2 bits out of 20) The classifier should get the distribution almost right despite the overlap """ c = Classifier(0.0005) # generate 2 SDRs with 2 shared bits SDR1 = SDR(100) SDR2 = SDR(100) SDR1.randomize(.20) SDR2.setSDR(SDR1) SDR2.addNoise(.9) random.seed(42) for _ in range(5000): randomNumber = random.random() if randomNumber < 0.3: bucketIdx = 0 elif randomNumber < 0.6: bucketIdx = 1 else: bucketIdx = 2 c.learn(SDR1, bucketIdx) randomNumber = random.random() if randomNumber < 0.5: bucketIdx = 1 else: bucketIdx = 3 c.learn(SDR2, bucketIdx) result1 = c.infer(SDR1) self.assertAlmostEqual(result1[0], 0.3, places=1) self.assertAlmostEqual(result1[1], 0.3, places=1) self.assertAlmostEqual(result1[2], 0.4, places=1) result2 = c.infer(SDR2) self.assertAlmostEqual(result2[1], 0.5, places=1) self.assertAlmostEqual(result2[3], 0.5, places=1)
def testAddNoise(self): A = SDR((103, )) B = SDR((103, )) A.randomize(.1) B.setSDR(A) A.addNoise(.5) assert (A.getOverlap(B) == 5) # Check different seed makes different results. A.randomize(.3, 42) B.randomize(.3, 42) A.addNoise(.5) B.addNoise(.5) assert (A != B) # Check same seed makes same results. A.randomize(.3, 42) B.randomize(.3, 42) A.addNoise(.5, 42) B.addNoise(.5, 42) assert (A == B) # Check that it returns itself. C = A.addNoise(.5) assert (C is A)
def testSetSDR(self): A = SDR((103, )) B = SDR((103, )) A.sparse = [66] B.setSDR(A) assert (B.dense[66] == 1) assert (B.getSum() == 1) B.dense[77] = 1 B.dense = B.dense A.setSDR(B) assert (set(A.sparse) == set((66, 77))) # Test wrong dimensions assigned C = SDR((2, 4, 5, 1, 1, 1, 1, 3)) C.randomize(.5) try: A.setSDR(C) except RuntimeError: pass else: self.fail() # Check return value. D = A.setSDR(B) assert (D is A)
# In order to normalize this value we divide by the minimum number of active # inputs (in either vector). This means we are considering the sparser vector as # reference. Two identical binary vectors will have an input overlap of 1, # whereas two completely different vectors (one is the logical NOT of the other) # will yield an overlap of 0. In this section we will see how the input overlap # of two binary vectors decrease as we add noise to one of them. inputX1 = SDR(inputSDR.size).randomize(.50) inputX2 = SDR(inputSDR.size) outputX1 = SDR(outputSDR.size) outputX2 = SDR(outputSDR.size) x = [] y = [] for noiseLevel in np.arange(0, 1.1, 0.1): inputX2.setSDR(inputX1) corruptSDR(inputX2, noiseLevel) x.append(noiseLevel) y.append(percentOverlap(inputX1, inputX2)) print("") print("---------------------------------") print("Figure 2 shows the input overlap between 2 identical binary vectors in") print("function of the noise applied to one of them.") print("0 noise level means that the vector remains the same, whereas") print( "1 means that the vector is the logical negation of the original vector. ") print( "The relationship between overlap and noise level is practically linear ") print("and monotonically decreasing.") print("---------------------------------")
def encode(self, inp, output=None): """ Argument inp: (datetime) representing the time being encoded """ if output is None: output = SDR(self.dimensions) else: assert (isinstance(output, SDR)) assert (all(x == y for x, y in zip(output.dimensions, self.dimensions))) if inp is None or (isinstance(inp, float) and math.isnan(inp)): output.zero() return output elif not isinstance(inp, datetime.datetime): raise ValueError("Input is type %s, expected datetime. Value: %s" % (type(inp), str(inp))) # ------------------------------------------------------------------------- # Encode each sub-field sdrs = [] timetuple = inp.timetuple() timeOfDay = timetuple.tm_hour + float(timetuple.tm_min) / 60.0 if self.seasonEncoder is not None: # Number the days starting at zero, intead of 1 like the datetime does. dayOfYear = timetuple.tm_yday - 1 assert (dayOfYear >= 0) # dayOfYear -= self.seasonEncoder.parameters.radius / 2. # Round towards the middle of the season. sdrs.append(self.seasonEncoder.encode(dayOfYear)) if self.dayOfWeekEncoder is not None: hrs_ = float( timeOfDay ) / 24.0 # add hours as decimal value in extension to day dayOfWeek = timetuple.tm_wday + hrs_ dayOfWeek -= .5 # Round towards noon, not midnight, this means similarity of representations changes at midnights, not noon. # handle underflow: on Mon before noon -> move to Sun if dayOfWeek < 0: dayOfWeek += 7 assert (dayOfWeek >= 0 and dayOfWeek < 7) sdrs.append(self.dayOfWeekEncoder.encode(dayOfWeek)) if self.weekendEncoder is not None: # saturday, sunday or friday evening if (timetuple.tm_wday == 6 or timetuple.tm_wday == 5 or (timetuple.tm_wday == 4 and timeOfDay > 18)): weekend = 1 else: weekend = 0 sdrs.append(self.weekendEncoder.encode(weekend)) if self.customDaysEncoder is not None: if timetuple.tm_wday in self.customDays: customDay = 1 else: customDay = 0 sdrs.append(self.customDaysEncoder.encode(customDay)) if self.holidayEncoder is not None: # A "continuous" binary value. = 1 on the holiday itself and smooth ramp # 0->1 on the day before the holiday and 1->0 on the day after the holiday. # holidays is a list of holidays that occur on a fixed date every year val = 0 for h in self.holidays: # hdate is midnight on the holiday if len(h) == 3: hdate = datetime.datetime(h[0], h[1], h[2], 0, 0, 0) else: hdate = datetime.datetime(timetuple.tm_year, h[0], h[1], 0, 0, 0) if inp > hdate: diff = inp - hdate if diff.days == 0: # return 1 on the holiday itself val = 1 break elif diff.days == 1: # ramp smoothly from 1 -> 0 on the next day val = 1.0 + (float(diff.seconds) / 86400) break else: diff = hdate - inp if diff.days == 0: # ramp smoothly from 0 -> 1 on the previous day val = 1.0 - (float(diff.seconds) / 86400) sdrs.append(self.holidayEncoder.encode(val)) if self.timeOfDayEncoder is not None: sdrs.append(self.timeOfDayEncoder.encode(timeOfDay)) if len(sdrs) > 1: output.concatenate(sdrs) else: output.setSDR(sdrs[0]) return output