def test_convert_matrix(self): for in_fmt in ('binary', 'json'): for out_fmt in ('binary', 'json', 'text'): # input if in_fmt == 'binary': m = JubaModel.load_binary(_get_binary_file()) else: m = JubaModel.load_json(_get_json_file()) # output if out_fmt == 'binary': f = BytesIO() m.dump_binary(f) f.seek(0) m2 = JubaModel.load_binary(f) elif out_fmt == 'json': f = StringIO() m.dump_json(f) f.seek(0) m2 = JubaModel.load_json(f) elif out_fmt == 'text': m.dump_text(StringIO()) continue # check for ((k1, v1), (k2, v2)) in zip(m.header.get(), m2.header.get()): self.assertEqual(k1, k2) self.assertEqual(v1, v2) for ((k1, v1), (k2, v2)) in zip(m.user.get(), m2.user.get()): self.assertEqual(k1, k2) self.assertEqual(v1, v2) for ((k1, v1), (k2, v2)) in zip(m.system.get(), m2.system.get()): self.assertEqual(k1, k2) self.assertEqual(v1, v2)
def _get_model(self, service, config): # Create empty model for the given service. s = service.run(config) path = None try: s.save('test') path = s.get_status().popitem()[1]['last_saved_path'] with open(path, 'rb') as f: return JubaModel.load_binary(f) finally: s.stop() if path and os.path.exists(path): os.remove(path)
def test_binary(self): # get a valid binary model file f = _get_binary_file() # enable validation: must be loaded successfully m = JubaModel.load_binary(f, True) self.assertEqual(1, m.header.jubatus_version_major) self.assertEqual(2, m.header.jubatus_version_minor) self.assertEqual(3, m.header.jubatus_version_maint) self.assertNotEqual(1, m.header.crc32) # must be a valid model after fix_header self.assertEqual(TEST_JSON['system']['config'], m.system.config) self.assertEqual('classifier', m.system.type) self.assertEqual(1, m.user.version) self.assertEqual(TEST_JSON['user']['user_data'], m.user.user_data) self.assertTrue(m._user_raw is not None)
def test_binary(self): # get a valid binary model file f = _get_binary_file() # enable validation: must be loaded successfully m = JubaModel.load_binary(f, True) self.assertEqual(1, m.header.jubatus_version_major) self.assertEqual(2, m.header.jubatus_version_minor) self.assertEqual(3, m.header.jubatus_version_maint) self.assertNotEqual( 1, m.header.crc32) # must be a valid model after fix_header self.assertEqual(TEST_JSON['system']['config'], m.system.config) self.assertEqual('classifier', m.system.type) self.assertEqual(1, m.user.version) self.assertEqual(TEST_JSON['user']['user_data'], m.user.user_data) self.assertTrue(m._user_raw is not None)
This is an example to show the usage of ``jubakit.model`` package, which allows low-level model manipulation. To try this example, first save a model file of jubaweight. (hint: ``weight_shogun.py`` example automatically saves the model under /tmp) Then run this example like: $ python weight_model_extract.py /tmp/127.0.0.1_0000_weight_shogun.jubatus to see the term frequency of each feature vector. """ # Load the model file. modelpath = 'weight_shogun_model.jubatus' if 1 < len(sys.argv): modelpath = sys.argv[1] with open(modelpath, 'rb') as f: model = JubaModel.load_binary(f) # Extract the term frequency part of the model data. weights = model.data()[0][1][1][0] # Sort features by the term frequency. sorted_weights = sorted(weights.items(), key=operator.itemgetter(1), reverse=True) # Print the result. print("Weight\t\tFeature") for (k, v) in sorted_weights: print("{0}\t\t{1}".format(v, k))