Beispiel #1
0
  def testSDRCategoryEncoder(self):
      print "Testing CategoryEncoder...",
      # make sure we have > 16 categories so that we have to grow our sdrs
      categories = ["ES", "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8",
                    "S9","S10", "S11", "S12", "S13", "S14", "S15", "S16",
                    "S17", "S18", "S19", "GB", "US"]

      fieldWidth = 100
      bitsOn = 10

      s = SDRCategoryEncoder(n=fieldWidth, w=bitsOn, categoryList = categories,
                             name="foo", verbosity=0, forced=True)

      # internal check
      self.assertEqual(s.sdrs.shape, (32, fieldWidth))

      # ES
      es = s.encode("ES")
      self.assertEqual(es.sum(), bitsOn)
      self.assertEqual(es.shape, (fieldWidth,))
      self.assertEqual(es.sum(), bitsOn)

      x = s.decode(es)
      self.assertIsInstance(x[0], dict)
      self.assertTrue("foo" in x[0])
      self.assertEqual(x[0]["foo"][1], "ES")

      topDown = s.topDownCompute(es)
      self.assertEqual(topDown.value, 'ES')
      self.assertEqual(topDown.scalar, 1)
      self.assertEqual(topDown.encoding.sum(), bitsOn)

      # ----------------------------------------------------------------------
      # Test topdown compute
      for v in categories:
        output = s.encode(v)
        topDown = s.topDownCompute(output)
        self.assertEqual(topDown.value, v)
        self.assertEqual(topDown.scalar, s.getScalars(v)[0])

        bucketIndices = s.getBucketIndices(v)
        print "bucket index =>", bucketIndices[0]
        topDown = s.getBucketInfo(bucketIndices)[0]
        self.assertEqual(topDown.value, v)
        self.assertEqual(topDown.scalar, s.getScalars(v)[0])
        self.assertTrue((topDown.encoding == output).all())
        self.assertEqual(topDown.value, s.getBucketValues()[bucketIndices[0]])


      # Unknown
      unknown = s.encode("ASDFLKJLK")
      self.assertEqual(unknown.sum(), bitsOn)
      self.assertEqual(unknown.shape, (fieldWidth,))
      self.assertEqual(unknown.sum(), bitsOn)
      x = s.decode(unknown)
      self.assertEqual(x[0]["foo"][1], "<UNKNOWN>")

      topDown = s.topDownCompute(unknown)
      self.assertEqual(topDown.value, "<UNKNOWN>")
      self.assertEqual(topDown.scalar, 0)

      # US
      us = s.encode("US")
      self.assertEqual(us.sum(), bitsOn)
      self.assertEqual(us.shape, (fieldWidth,))
      self.assertEqual(us.sum(), bitsOn)
      x = s.decode(us)
      self.assertEqual(x[0]["foo"][1], "US")

      topDown = s.topDownCompute(us)
      self.assertEqual(topDown.value, "US")
      self.assertEqual(topDown.scalar, len(categories))
      self.assertEqual(topDown.encoding.sum(), bitsOn)

      # empty field
      empty = s.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
      self.assertEqual(empty.sum(), 0)
      self.assertEqual(empty.shape, (fieldWidth,))
      self.assertEqual(empty.sum(), 0)

      # make sure it can still be decoded after a change
      bit =  s.random.randint(0, s.getWidth()-1)
      us[bit] = 1 - us[bit]
      x = s.decode(us)
      self.assertEqual(x[0]["foo"][1], "US")


      # add two reps together
      newrep = ((us + unknown) > 0).astype('uint8')
      x = s.decode(newrep)
      name =x[0]["foo"][1]
      if name != "US <UNKNOWN>" and name != "<UNKNOWN> US":
        othercategory = name.replace("US", "")
        othercategory = othercategory.replace("<UNKNOWN>", "")
        othercategory = othercategory.replace(" ", "")
        otherencoded = s.encode(othercategory)
        print "Got: %s instead of US/unknown" % name
        print "US: %s" % us
        print "unknown: %s" % unknown
        print "Sum: %s" % newrep
        print "%s: %s" % (othercategory, s.encode(othercategory))

        print "Matches with US: %d" % (us * newrep).sum()
        print "Matches with unknown: %d" % (unknown * newrep).sum()
        print "Matches with %s: %d" % (othercategory,
                         (otherencoded * newrep).sum())

        raise RuntimeError("Decoding failure")

      # serialization
      import cPickle as pickle
      t = pickle.loads(pickle.dumps(s))
      self.assertTrue((t.encode("ES") == es).all())
      self.assertTrue((t.encode("GB") == s.encode("GB")).all())


      # Test autogrow
      s = SDRCategoryEncoder(n=fieldWidth, w=bitsOn, categoryList = None, name="bar", forced=True)

      es = s.encode("ES")
      self.assertEqual(es.shape, (fieldWidth,))
      self.assertEqual(es.sum(), bitsOn)
      x = s.decode(es)
      self.assertIsInstance(x[0], dict)
      self.assertTrue("bar" in x[0])
      self.assertEqual(x[0]["bar"][1], "ES")


      us = s.encode("US")
      self.assertEqual(us.shape, (fieldWidth,))
      self.assertEqual(us.sum(), bitsOn)
      x = s.decode(us)
      self.assertEqual(x[0]["bar"][1], "US")

      es2 = s.encode("ES")
      self.assertTrue((es2 == es).all())

      us2 = s.encode("US")
      self.assertTrue((us2 == us).all())

      # make sure it can still be decoded after a change
      bit =  s.random.randint(0, s.getWidth()-1)
      us[bit] = 1 - us[bit]
      x = s.decode(us)
      self.assertEqual(x[0]["bar"][1], "US")

      # add two reps together
      newrep = ((us + es) > 0).astype('uint8')
      x = s.decode(newrep)
      name =x[0]["bar"][1]
      self.assertTrue(name == "US ES" or name == "ES US")

      # Catch duplicate categories
      caughtException = False
      newcategories = categories[:]
      self.assertTrue("ES" in newcategories)
      newcategories.append("ES")
      try:
        s = SDRCategoryEncoder(n=fieldWidth, w=bitsOn, categoryList = newcategories, name="foo", forced=True)
      except RuntimeError, e:
        caughtException = True
Beispiel #2
0
    def testSDRCategoryEncoder(self):
        print "Testing CategoryEncoder...",
        # make sure we have > 16 categories so that we have to grow our sdrs
        categories = [
            "ES", "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10",
            "S11", "S12", "S13", "S14", "S15", "S16", "S17", "S18", "S19",
            "GB", "US"
        ]

        fieldWidth = 100
        bitsOn = 10

        s = SDRCategoryEncoder(n=fieldWidth,
                               w=bitsOn,
                               categoryList=categories,
                               name="foo",
                               verbosity=0)

        # internal check
        self.assertEqual(s.sdrs.shape, (32, fieldWidth))

        # ES
        es = s.encode("ES")
        self.assertEqual(es.sum(), bitsOn)
        self.assertEqual(es.shape, (fieldWidth, ))
        self.assertEqual(es.sum(), bitsOn)

        x = s.decode(es)
        self.assertIsInstance(x[0], dict)
        self.assertTrue("foo" in x[0])
        self.assertEqual(x[0]["foo"][1], "ES")

        topDown = s.topDownCompute(es)
        self.assertEqual(topDown.value, 'ES')
        self.assertEqual(topDown.scalar, 1)
        self.assertEqual(topDown.encoding.sum(), bitsOn)

        # ----------------------------------------------------------------------
        # Test topdown compute
        for v in categories:
            output = s.encode(v)
            topDown = s.topDownCompute(output)
            self.assertEqual(topDown.value, v)
            self.assertEqual(topDown.scalar, s.getScalars(v)[0])

            bucketIndices = s.getBucketIndices(v)
            print "bucket index =>", bucketIndices[0]
            topDown = s.getBucketInfo(bucketIndices)[0]
            self.assertEqual(topDown.value, v)
            self.assertEqual(topDown.scalar, s.getScalars(v)[0])
            self.assertTrue((topDown.encoding == output).all())
            self.assertEqual(topDown.value,
                             s.getBucketValues()[bucketIndices[0]])

        # Unknown
        unknown = s.encode("ASDFLKJLK")
        self.assertEqual(unknown.sum(), bitsOn)
        self.assertEqual(unknown.shape, (fieldWidth, ))
        self.assertEqual(unknown.sum(), bitsOn)
        x = s.decode(unknown)
        self.assertEqual(x[0]["foo"][1], "<UNKNOWN>")

        topDown = s.topDownCompute(unknown)
        self.assertEqual(topDown.value, "<UNKNOWN>")
        self.assertEqual(topDown.scalar, 0)

        # US
        us = s.encode("US")
        self.assertEqual(us.sum(), bitsOn)
        self.assertEqual(us.shape, (fieldWidth, ))
        self.assertEqual(us.sum(), bitsOn)
        x = s.decode(us)
        self.assertEqual(x[0]["foo"][1], "US")

        topDown = s.topDownCompute(us)
        self.assertEqual(topDown.value, "US")
        self.assertEqual(topDown.scalar, len(categories))
        self.assertEqual(topDown.encoding.sum(), bitsOn)

        # empty field
        empty = s.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
        self.assertEqual(empty.sum(), 0)
        self.assertEqual(empty.shape, (fieldWidth, ))
        self.assertEqual(empty.sum(), 0)

        # make sure it can still be decoded after a change
        bit = s.random.randint(0, s.getWidth() - 1)
        us[bit] = 1 - us[bit]
        x = s.decode(us)
        self.assertEqual(x[0]["foo"][1], "US")

        # add two reps together
        newrep = ((us + unknown) > 0).astype('uint8')
        x = s.decode(newrep)
        name = x[0]["foo"][1]
        if name != "US <UNKNOWN>" and name != "<UNKNOWN> US":
            othercategory = name.replace("US", "")
            othercategory = othercategory.replace("<UNKNOWN>", "")
            othercategory = othercategory.replace(" ", "")
            otherencoded = s.encode(othercategory)
            print "Got: %s instead of US/unknown" % name
            print "US: %s" % us
            print "unknown: %s" % unknown
            print "Sum: %s" % newrep
            print "%s: %s" % (othercategory, s.encode(othercategory))

            print "Matches with US: %d" % (us * newrep).sum()
            print "Matches with unknown: %d" % (unknown * newrep).sum()
            print "Matches with %s: %d" % (othercategory,
                                           (otherencoded * newrep).sum())

            raise RuntimeError("Decoding failure")

        # serialization
        import cPickle as pickle
        t = pickle.loads(pickle.dumps(s))
        self.assertTrue((t.encode("ES") == es).all())
        self.assertTrue((t.encode("GB") == s.encode("GB")).all())

        # Test autogrow
        s = SDRCategoryEncoder(n=fieldWidth,
                               w=bitsOn,
                               categoryList=None,
                               name="bar")

        es = s.encode("ES")
        self.assertEqual(es.shape, (fieldWidth, ))
        self.assertEqual(es.sum(), bitsOn)
        x = s.decode(es)
        self.assertIsInstance(x[0], dict)
        self.assertTrue("bar" in x[0])
        self.assertEqual(x[0]["bar"][1], "ES")

        us = s.encode("US")
        self.assertEqual(us.shape, (fieldWidth, ))
        self.assertEqual(us.sum(), bitsOn)
        x = s.decode(us)
        self.assertEqual(x[0]["bar"][1], "US")

        es2 = s.encode("ES")
        self.assertTrue((es2 == es).all())

        us2 = s.encode("US")
        self.assertTrue((us2 == us).all())

        # make sure it can still be decoded after a change
        bit = s.random.randint(0, s.getWidth() - 1)
        us[bit] = 1 - us[bit]
        x = s.decode(us)
        self.assertEqual(x[0]["bar"][1], "US")

        # add two reps together
        newrep = ((us + es) > 0).astype('uint8')
        x = s.decode(newrep)
        name = x[0]["bar"][1]
        self.assertTrue(name == "US ES" or name == "ES US")

        # Catch duplicate categories
        caughtException = False
        newcategories = categories[:]
        self.assertTrue("ES" in newcategories)
        newcategories.append("ES")
        try:
            s = SDRCategoryEncoder(n=fieldWidth,
                                   w=bitsOn,
                                   categoryList=newcategories,
                                   name="foo")
        except RuntimeError, e:
            caughtException = True
    def testSDRCategoryEncoder(self):
        # make sure we have > 16 categories so that we have to grow our sdrs
        categories = [
            "ES", "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10",
            "S11", "S12", "S13", "S14", "S15", "S16", "S17", "S18", "S19",
            "GB", "US"
        ]

        fieldWidth = 100
        bitsOn = 10

        s = SDRCategoryEncoder(n=fieldWidth,
                               w=bitsOn,
                               categoryList=categories,
                               name="foo",
                               verbosity=0,
                               forced=True)

        # internal check
        self.assertEqual(s.sdrs.shape, (32, fieldWidth))

        # ES
        es = s.encode("ES")
        self.assertEqual(es.sum(), bitsOn)
        self.assertEqual(es.shape, (fieldWidth, ))
        self.assertEqual(es.sum(), bitsOn)

        x = s.decode(es)
        self.assertIsInstance(x[0], dict)
        self.assertTrue("foo" in x[0])
        self.assertEqual(x[0]["foo"][1], "ES")

        topDown = s.topDownCompute(es)
        self.assertEqual(topDown.value, "ES")
        self.assertEqual(topDown.scalar, 1)
        self.assertEqual(topDown.encoding.sum(), bitsOn)

        # ----------------------------------------------------------------------
        # Test topdown compute
        for v in categories:
            output = s.encode(v)
            topDown = s.topDownCompute(output)
            self.assertEqual(topDown.value, v)
            self.assertEqual(topDown.scalar, s.getScalars(v)[0])

            bucketIndices = s.getBucketIndices(v)
            topDown = s.getBucketInfo(bucketIndices)[0]
            self.assertEqual(topDown.value, v)
            self.assertEqual(topDown.scalar, s.getScalars(v)[0])
            self.assertTrue(numpy.array_equal(topDown.encoding, output))
            self.assertEqual(topDown.value,
                             s.getBucketValues()[bucketIndices[0]])

        # Unknown
        unknown = s.encode("ASDFLKJLK")
        self.assertEqual(unknown.sum(), bitsOn)
        self.assertEqual(unknown.shape, (fieldWidth, ))
        self.assertEqual(unknown.sum(), bitsOn)
        x = s.decode(unknown)
        self.assertEqual(x[0]["foo"][1], "<UNKNOWN>")

        topDown = s.topDownCompute(unknown)
        self.assertEqual(topDown.value, "<UNKNOWN>")
        self.assertEqual(topDown.scalar, 0)

        # US
        us = s.encode("US")
        self.assertEqual(us.sum(), bitsOn)
        self.assertEqual(us.shape, (fieldWidth, ))
        self.assertEqual(us.sum(), bitsOn)
        x = s.decode(us)
        self.assertEqual(x[0]["foo"][1], "US")

        topDown = s.topDownCompute(us)
        self.assertEqual(topDown.value, "US")
        self.assertEqual(topDown.scalar, len(categories))
        self.assertEqual(topDown.encoding.sum(), bitsOn)

        # empty field
        empty = s.encode(SENTINEL_VALUE_FOR_MISSING_DATA)
        self.assertEqual(empty.sum(), 0)
        self.assertEqual(empty.shape, (fieldWidth, ))
        self.assertEqual(empty.sum(), 0)

        # make sure it can still be decoded after a change
        bit = s.random.getUInt32(s.getWidth() - 1)
        us[bit] = 1 - us[bit]
        x = s.decode(us)
        self.assertEqual(x[0]["foo"][1], "US")

        # add two reps together
        newrep = ((us + unknown) > 0).astype(numpy.uint8)
        x = s.decode(newrep)
        name = x[0]["foo"][1]
        if name != "US <UNKNOWN>" and name != "<UNKNOWN> US":
            othercategory = name.replace("US", "")
            othercategory = othercategory.replace("<UNKNOWN>", "")
            othercategory = othercategory.replace(" ", "")
            otherencoded = s.encode(othercategory)
            raise RuntimeError("Decoding failure")

        # serialization
        # TODO: Remove pickle-based serialization tests -- issues #1419 and #1420
        import cPickle as pickle
        t = pickle.loads(pickle.dumps(s))
        self.assertTrue((t.encode("ES") == es).all())
        self.assertTrue((t.encode("GB") == s.encode("GB")).all())

        # Test autogrow
        s = SDRCategoryEncoder(n=fieldWidth,
                               w=bitsOn,
                               categoryList=None,
                               name="bar",
                               forced=True)

        es = s.encode("ES")
        self.assertEqual(es.shape, (fieldWidth, ))
        self.assertEqual(es.sum(), bitsOn)
        x = s.decode(es)
        self.assertIsInstance(x[0], dict)
        self.assertTrue("bar" in x[0])
        self.assertEqual(x[0]["bar"][1], "ES")

        us = s.encode("US")
        self.assertEqual(us.shape, (fieldWidth, ))
        self.assertEqual(us.sum(), bitsOn)
        x = s.decode(us)
        self.assertEqual(x[0]["bar"][1], "US")

        es2 = s.encode("ES")
        self.assertTrue(numpy.array_equal(es2, es))

        us2 = s.encode("US")
        self.assertTrue(numpy.array_equal(us2, us))

        # make sure it can still be decoded after a change
        bit = s.random.getUInt32(s.getWidth() - 1)
        us[bit] = 1 - us[bit]
        x = s.decode(us)
        self.assertEqual(x[0]["bar"][1], "US")

        # add two reps together
        newrep = ((us + es) > 0).astype(numpy.uint8)
        x = s.decode(newrep)
        name = x[0]["bar"][1]
        self.assertTrue(name == "US ES" or name == "ES US")

        # Catch duplicate categories
        caughtException = False
        newcategories = categories[:]
        self.assertTrue("ES" in newcategories)
        newcategories.append("ES")
        try:
            s = SDRCategoryEncoder(n=fieldWidth,
                                   w=bitsOn,
                                   categoryList=newcategories,
                                   name="foo",
                                   forced=True)
        except RuntimeError, e:
            caughtException = True