def test(self): """Test the correctness by comparing ratios of the original histogram. """ counter = defaultdict(int) size = 100000 sampler = RatingBasedSampler() for s in itertools.islice(sampler(), size): counter[s] += 1 ratios = [ abs(counter[normalize_rating(rating)] / size - value) for rating, value in sampler._dist.items() ] error = max(ratios) - min(ratios) self.assertLess(error, size * 0.01)
def test_load_twice(self): """Test even if load function is called twice, it loads correct dataset. """ # Load a dataset. g = loader.load(Graph(), self.input) # Create another dataset which some nodes are overrapped. max_reviewers = 15 max_products = 10 buf = StringIO.StringIO() for r in range(5, max_reviewers): for p in range(3, max_products): member_id = "r{0}".format(r) product_id = "p{0}".format(p) if product_id in self.reviews[member_id]: continue rating = random.randint(1, 5) self.reviews[member_id][ product_id] = normalize_rating(rating) self.size += 1 json.dump({ "member_id": member_id, "product_id": product_id, "rating": rating, "date": "2014-01-01" }, buf) buf.write("\n") # Load another dataset. g = loader.load(g, buf.getvalue().strip().split("\n")) self.assertEqual(len(g.review), self.size) for r in g.review: self.assertIn(r.member_id, self.reviews) self.assertIn(r.product_id, self.reviews[r.member_id]) self.assertAlmostEqual( r.rating, self.reviews[r.member_id][r.product_id])
def setUp(self): """Create a sample graph and its JSON data. """ self.graph = Graph() reviewers = {} products = {} for r in range(10): for p in range(5): if random.random() > 0.5: continue member_id = "r{0}".format(r) product_id = "p{0}".format(p) rating = normalize_rating(random.randint(1, 5)) if member_id not in reviewers: reviewers[member_id] = self.graph.new_reviewer(member_id) if product_id not in products: products[product_id] = self.graph.new_product(product_id) self.graph.add_review(reviewers[member_id], products[product_id], rating, "2014-01-01")
def test_print_state(self): """Test print_state with simple data. """ i = "abs" output = StringIO.StringIO() helper.print_state(self.graph, i, output) for line in output.getvalue().split("\n"): if not line: continue obj = json.loads(line) self.assertEqual(obj["iteration"], i) if "reviewer" in obj: rid = obj["reviewer"]["reviewer_id"] score = obj["reviewer"]["score"] self.assertIn(rid, self.reviewers) self.assertEqual(score, self.reviewers[rid].anomalous_score) else: pid = obj["product"]["product_id"] summary = helper.normalize_rating(obj["product"]["summary"]) self.assertIn(pid, self.products) self.assertAlmostEqual(summary, self.products[pid].summary)
def setUp(self): """Create a sample graph and its JSON data. """ self.reviews = defaultdict(dict) self.size = 0 buf = StringIO.StringIO() for r in range(10): for p in range(5): if random.random() > 0.5: member_id = "r{0}".format(r) product_id = "p{0}".format(p) rating = random.randint(1, 5) self.reviews[member_id][ product_id] = normalize_rating(rating) self.size += 1 json.dump({ "member_id": member_id, "product_id": product_id, "rating": rating, "date": "2014-01-01" }, buf) buf.write("\n") self.input = buf.getvalue().split("\n")
def test(self): """Test with simple inputs. """ self.assertEqual(helper.normalize_rating(5), 1.) self.assertEqual(helper.normalize_rating(1), 0.) self.assertEqual(helper.normalize_rating(3), 0.5)