def test_similar_row_from_datum_and_rate(self): filter_warning() recommender = Recommender.run(Config()) loader = StubLoader() schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) for (idx, row_id, result) in recommender.similar_row_from_datum_and_rate(dataset): self.assertEqual(0, len(result)) # rate must be in (0, 1]. def func(): for _ in recommender.similar_row_from_datum_and_rate(dataset, rate=0.0): pass self.assertRaises(ValueError, lambda: func()) def func(): for _ in recommender.similar_row_from_datum_and_rate(dataset, rate=1.01): pass self.assertRaises(ValueError, lambda: func()) schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) for (idx, row_id, result) in recommender.similar_row_from_datum_and_rate(dataset): self.assertEqual(None, row_id) # there is no id in column_table self.assertEqual( 0, len(result)) # there is no similar row in column_table recommender.stop()
def test_similar_row_from_id_and_score(self): filter_warning() recommender = Recommender.run(Config()) loader = StubLoader() # dataset must have id when execute `similar_row_from_id_and_score` schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) def func(): for _ in recommender.similar_row_from_id_and_score(dataset): pass self.assertRaises(RuntimeError, lambda: func()) schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) for (idx, row_id, result) in recommender.similar_row_from_id_and_score(dataset): self.assertEqual(str(idx + 1), row_id) # there is no id in column_table self.assertEqual( 0, len(result)) # there is no similar row in column_table recommender.stop()
def test_method_param(self): self.assertTrue('parameter' not in Config(method='inverted_index')) self.assertTrue('hash_num' in Config(method='minhash')['parameter']) self.assertTrue('hash_num' in Config(method='lsh')['parameter']) self.assertTrue('threads' in Config(method='lsh')['parameter']) self.assertTrue('method' in Config( method='nearest_neighbor_recommender')['parameter']) self.assertTrue('parameter' in Config( method='nearest_neighbor_recommender')['parameter']) self.assertTrue('threads' in Config( method='nearest_neighbor_recommender')['parameter']['parameter']) self.assertTrue('hash_num' in Config( method='nearest_neighbor_recommender')['parameter']['parameter'])
def test_update_row(self): recommender = Recommender.run(Config()) loader = StubLoader() # dataset must have id when execute `update_row` schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) def func(): for _ in recommender.update_row(dataset): pass self.assertRaises(RuntimeError, lambda: func()) schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) for (idx, row_id, result) in recommender.update_row(dataset): self.assertEqual(result, True) recommender.stop()
def test_similar_row_from_datum(self): filter_warning() recommender = Recommender.run(Config()) loader = StubLoader() schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) for (idx, row_id, result) in recommender.similar_row_from_datum(dataset): self.assertEqual(0, len(result)) schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) for (idx, row_id, result) in recommender.similar_row_from_datum(dataset): self.assertEqual(None, row_id) # there is no id in column_table self.assertEqual( 0, len(result)) # there is no similar row in column_table recommender.stop()
def test_complete_row_from_datum(self): filter_warning() recommender = Recommender.run(Config()) loader = StubLoader() schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) for (idx, row_id, d) in recommender.complete_row_from_datum(dataset): self.assertEqual(0, len(d.string_values)) self.assertEqual(0, len(d.num_values)) self.assertEqual(0, len(d.binary_values)) schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) for (idx, row_id, d) in recommender.complete_row_from_datum(dataset): self.assertEqual(None, row_id) # there is no id in column_table. self.assertEqual(0, len(d.string_values)) self.assertEqual(0, len(d.num_values)) self.assertEqual(0, len(d.binary_values)) recommender.stop()
def test_clear_row(self): recommender = Recommender.run(Config()) loader = StubLoader() # dataset must have id when execute `clear_row`. schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) def func(): for _ in recommender.clear_row(dataset): pass self.assertRaises(RuntimeError, lambda: func()) schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) # expect to get False when table is empty. for (idx, row_id, result) in recommender.clear_row(dataset): self.assertEqual(result, True) recommender.stop()
def test_complete_row_from_id(self): filter_warning() recommender = Recommender.run(Config()) loader = StubLoader() # dataset must have id when execute `complete_row_from_id` schema = Schema({'v': Schema.NUMBER}) dataset = Dataset(loader, schema) def func(): for _ in recommender.complete_row_from_id(dataset): pass self.assertRaises(RuntimeError, lambda: func()) schema = Schema({'v': Schema.ID}) dataset = Dataset(loader, schema) for (idx, row_id, d) in recommender.complete_row_from_id(dataset): self.assertEqual(0, len(d.string_values)) self.assertEqual(0, len(d.num_values)) self.assertEqual(0, len(d.binary_values)) recommender.stop()
def test_embedded(self): recommender = Recommender.run(Config(), embedded=True)
def test_default(self): config = Config.default() self.assertEqual('lsh', config['method'])
def test_methods(self): config = Config() self.assertTrue(isinstance(config.methods(), list))
def test_simple(self): config = Config() self.assertEqual('lsh', config['method'])
from jubakit.loader.csv import CSVLoader # Load a CSV file. loader = CSVLoader('npb.csv') # Define a Schema that defines types for each columns of the CSV file. schema = Schema({ 'name': Schema.ID, 'team': Schema.STRING, }, Schema.NUMBER) # Create a Dataset. dataset = Dataset(loader, schema) # Create an Recommender Service. cfg = Config(method='lsh') recommender = Recommender.run(cfg) # Update the Recommender model. for (idx, row_id, success) in recommender.update_row(dataset): pass # Calculate the similarity in recommender model from row-id and display top-2 similar items. print('{0}\n recommend similar players from row-id \n{1}'.format( '-' * 60, '-' * 60)) for (idx, row_id, result) in recommender.similar_row_from_id(dataset, size=3): if idx % 10 == 0: print( 'player {0} is similar to : {1} (score:{2:.3f}), {3} (score:{4:.3f})' .format(result[0].id, result[1].id, result[1].score, result[2].id, result[2].score))