def make_troia_job(job, gold_samples=[]): """ Creates Troia job corresponding to passed Job object. """ client = TroiaClient(settings.TROIA_HOST, None) job_id = make_job_id(job=job) TroiaJob.objects.create(job=job, troia_id=job_id) client.reset(job_id) client.load_categories(cost_matrix, job_id) client.load_gold_labels(gold_samples, job_id)
def run_simulation(opts): dsas = TroiaClient(url, None) dsas.reset(opts.ID) dsas.load_categories( [(labels[i], {labels[i]:0., labels[1 - i]:1.}) for i in xrange(2)], opts.ID) workers, objects, golds = gen_items(opts) start_time = time.time() for i in build_progressbar("Simulation iterations")(xrange(opts.it)): if (i + 1) % DUMP_FREQ == 0: current = time.time() duration = current - start_time log.info("Average speed: %s (labels/sec) assigned labels: %s ", str(float(DUMP_FREQ) / duration), str(i)) start_time = current r = random.random() if r < opts.gratio and golds: obj = random.choice(golds) dsas.load_gold_label(obj, labels[1], opts.ID) else: obj = random.choice(objects) dsas.load_worker_assigned_label(random.choice(workers), obj, random.choice(labels), opts.ID)
def setUp(self): self.tc = TroiaClient(self.base_url) self.ID = "42" self.tc.load_categories(self._labels(), self.ID)
class TestClient(unittest.TestCase): base_url = "http://localhost:8080/GetAnotherLabel/rest/" labels = [u"dog", "cat", "pig"] objects = [u"o_dog_0", u"o_dog_1", "o_cat_0", "o_pig_0", "o_pig_1"] prior = .314 miss_class_cost = .141 def _misclass_cost(self, label): d = dict([(x, self.miss_class_cost) for x in self.labels if x != label]) d[label] = 0. return d def _labels(self): return [(l, self._misclass_cost(l)) for l in self.labels] assigned_labels = \ [("worker" + str(i), objects[0], "dog") for i in xrange(9)] + \ [("worker9", objects[0], "pig")] def setUp(self): self.tc = TroiaClient(self.base_url) self.ID = "42" self.tc.load_categories(self._labels(), self.ID) def tearDown(self): self.tc.reset(self.ID) def testSetUp(self): self.assertTrue(self.tc.exists(self.ID)) self.assertFalse(self.tc.exists(self.ID + "0")) def testPing(self): r = self.tc.ping() prefix = "\"processing request at: " self.assertTrue(r.startswith(prefix)) # I wanted to parse date but it its in some wired form so.. def testReset(self): r = self.tc.reset(self.ID) self.assertTrue(r.startswith("nullified the ds object")) self.assertFalse(self.tc.exists(self.ID)) def testExists(self): self.assertFalse(self.tc.exists(self.ID + "0")) self.assertTrue(self.tc.exists(self.ID)) def testLoadCategories(self): ID = self.ID + "AnotherTestID" self.tc.reset(ID) self.assertFalse(self.tc.exists(ID)) self.tc.load_categories(self._labels(), ID) self.assertTrue(self.tc.exists(ID)) r = self.tc.get_dawid_skene(ID) for k, v in r['categories'].iteritems(): self.assertTrue(k in self.labels) self.assertEqual(v['name'], k) for n, c in v['misclassification_cost'].iteritems(): print n, c, k, v if n == k: self.assertEqual(0., c) else: self.assertEqual(self.miss_class_cost, c) self.tc.reset(ID) def testLoadCosts(self): self.tc.load_costs([(u"dog", "cat", 0.511)], self.ID) r = self.tc.get_dawid_skene(self.ID) self.assertEqual( 0.511, r['categories'][u'dog']['misclassification_cost']['cat']) def testAssignLabel(self): job = self.assigned_labels[0] self.tc.load_worker_assigned_label(job[0], job[1], job[2], self.ID) r = self.tc.get_dawid_skene(self.ID) resp = r['workers'][job[0]]['labels'][0] print job sorted(job) sorted(resp.values()) self.assertEqual(sorted(job), sorted(resp.values())) def testAssignLabels(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r = self.tc.get_dawid_skene(self.ID)['workers'] for job in self.assigned_labels: resp = r[job[0]]['labels'][0] self.assertEqual(sorted(job), sorted(resp.values())) # self.assertEqual(job, r[job['workerName']]['labels'][0]) def testGoldLabel(self): item = (self.objects[0], u"dog") self.tc.load_gold_label(item[0], item[1], self.ID) r = self.tc.get_dawid_skene(self.ID)['objects'] self.assertEqual(item[1], r[item[0]]["correctCategory"]) self.assertTrue(r[item[0]]["isGold"]) def testGoldLabels(self): gold_labels = [(self.objects[0], "dog"), (self.objects[-1], "pig")] self.tc.load_gold_labels(gold_labels, self.ID) r = self.tc.get_dawid_skene(self.ID)['objects'] for item in gold_labels: self.assertTrue(r[item[0]]["isGold"]) self.assertEqual(item[1], r[item[0]]["correctCategory"]) def testMajorityVote(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(3, self.ID) for _ in xrange(10): r = self.tc.majority_vote(self.objects[0], self.ID) print r # self.assertEqual(self.objects[0].split("_")[1], r) self.fail() def testMajorityVotes(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(3, self.ID) for _ in xrange(10): r = self.tc.majority_votes(self.ID) print r self.fail() def testComputeBlocking(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r1 = self.tc.compute_blocking(1, self.ID) r3 = self.tc.compute_blocking(3, self.ID) print r1, r3 self.fail() def testPrintWorkerSummary(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(10, self.ID) r = self.tc.print_worker_summary(True, self.ID) self.assertEqual(10, r.count("Confusion Matrix:")) self.assertEqual(10, r.count("Number of Annotations: 1")) for l in self.assigned_labels: self.assertEqual(1, r.count("Worker: " + l[0])) def testPrintObjectsProbs(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r = self.tc.print_objects_probs([], self.ID) print r self.fail() def testObjectProbs(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r = self.tc.object_probs(self.objects[0], self.ID) from math import fsum self.assertAlmostEqual(1., fsum(r.values()), 10) def testPrintPriors(self): r = self.tc.print_priors(self.ID) for l in r.splitlines(): pass #self.assertIn(str(self.prior), l) # whe don't support priors def testClassPriors(self): r = self.tc.class_priors(self.ID) r = json.loads(r) for _, v in r.iteritems(): pass #self.assertEqual(self.prior, v) # whe don't support priors def testGetDawidSkene(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(3, self.ID) gold_labels = [{ "objectName": self.objects[0], "correctCategory": "dog" }, { "objectName": self.objects[-1], "correctCategory": "pig" }] self.tc.load_gold_labels(gold_labels, self.ID) self.tc.compute_blocking(3, self.ID) r = self.tc.get_dawid_skene(self.ID) for k in ["workers", "objects", "id", "fixedPriors", "categories"]: self.assertTrue(k in r.keys()) for l in self.assigned_labels: self.assertTrue(l[0] in r["workers"].keys())
def run_troia_on_data(cost_matrix, gold_samples, workers_labels): troia_client = TroiaClient(TROIA_SERVER_URL, TIMEOUT) # we create client instance ID = TUTORIAL_JOB_ID troia_client.reset(ID) # just to make sure that we don't interfere with some old data troia_client.load_categories(cost_matrix, ID) # we send cost matrix that contains all labels / categories troia_client.load_gold_labels(gold_samples, ID) # send samples for which we know correct label # we could also do this that way: # for object_id, label in gold_samples: # troia_client.load_gold_label(object_id, label, ID) troia_client.load_worker_assigned_labels(workers_labels, ID) # send labels that worker assigned to objects # we could also do this that way: # for worker, object_id, label in workers_labels: # troia_client.load_worker_assigned_label(worker, object_id, label, ID) troia_client.compute_blocking(ITERATIONS, ID) # we start actual calculations results = troia_client.get_dawid_skene(ID) # we collect results and stats # pprint.pprint(results) # and just print them with some formatting objects_data = results['objects'].items() # extracting data related to objects object_label_probabilities = [(object_id, params['categoryProbability']) for object_id, params in objects_data] # we extract label distributions for objects print "Object label probabilities:" pprint.pprint(sorted(object_label_probabilities)) # and we print them # extracting data related to workers workers_summary = str(troia_client.print_worker_summary(True, ID)) print print "Workers summary:" print workers_summary
class TestClient(unittest.TestCase): base_url = "http://localhost:8080/GetAnotherLabel/rest/" labels = [u"dog", "cat", "pig"] objects = [u"o_dog_0", u"o_dog_1", "o_cat_0", "o_pig_0", "o_pig_1"] prior = .314 miss_class_cost = .141 def _misclass_cost(self, label): d = dict([(x, self.miss_class_cost) for x in self.labels if x != label]) d[label] = 0. return d def _labels(self): return [(l, self._misclass_cost(l)) for l in self.labels] assigned_labels = \ [("worker" + str(i), objects[0], "dog") for i in xrange(9)] + \ [("worker9", objects[0], "pig")] def setUp(self): self.tc = TroiaClient(self.base_url) self.ID = "42" self.tc.load_categories(self._labels(), self.ID) def tearDown(self): self.tc.reset(self.ID) def testSetUp(self): self.assertTrue(self.tc.exists(self.ID)) self.assertFalse(self.tc.exists(self.ID + "0")) def testPing(self): r = self.tc.ping() prefix = "\"processing request at: " self.assertTrue(r.startswith(prefix)) # I wanted to parse date but it its in some wired form so.. def testReset(self): r = self.tc.reset(self.ID) self.assertTrue(r.startswith("nullified the ds object")) self.assertFalse(self.tc.exists(self.ID)) def testExists(self): self.assertFalse(self.tc.exists(self.ID + "0")) self.assertTrue(self.tc.exists(self.ID)) def testLoadCategories(self): ID = self.ID + "AnotherTestID" self.tc.reset(ID) self.assertFalse(self.tc.exists(ID)) self.tc.load_categories(self._labels(), ID) self.assertTrue(self.tc.exists(ID)) r = self.tc.get_dawid_skene(ID) for k, v in r['categories'].iteritems(): self.assertTrue(k in self.labels) self.assertEqual(v['name'], k) for n, c in v['misclassification_cost'].iteritems(): print n, c, k, v if n == k: self.assertEqual(0., c) else: self.assertEqual(self.miss_class_cost, c) self.tc.reset(ID) def testLoadCosts(self): self.tc.load_costs( [(u"dog", "cat", 0.511)], self.ID) r = self.tc.get_dawid_skene(self.ID) self.assertEqual(0.511, r['categories'][u'dog']['misclassification_cost']['cat']) def testAssignLabel(self): job = self.assigned_labels[0] self.tc.load_worker_assigned_label(job[0], job[1], job[2], self.ID) r = self.tc.get_dawid_skene(self.ID) resp = r['workers'][job[0]]['labels'][0] print job sorted(job) sorted(resp.values()) self.assertEqual(sorted(job), sorted(resp.values())) def testAssignLabels(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r = self.tc.get_dawid_skene(self.ID)['workers'] for job in self.assigned_labels: resp = r[job[0]]['labels'][0] self.assertEqual(sorted(job), sorted(resp.values())) # self.assertEqual(job, r[job['workerName']]['labels'][0]) def testGoldLabel(self): item = (self.objects[0], u"dog") self.tc.load_gold_label(item[0], item[1], self.ID) r = self.tc.get_dawid_skene(self.ID)['objects'] self.assertEqual(item[1], r[item[0]]["correctCategory"]) self.assertTrue(r[item[0]]["isGold"]) def testGoldLabels(self): gold_labels = [(self.objects[0], "dog"), (self.objects[-1], "pig")] self.tc.load_gold_labels(gold_labels, self.ID) r = self.tc.get_dawid_skene(self.ID)['objects'] for item in gold_labels: self.assertTrue(r[item[0]]["isGold"]) self.assertEqual(item[1], r[item[0]]["correctCategory"]) def testMajorityVote(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(3, self.ID) for _ in xrange(10): r = self.tc.majority_vote(self.objects[0], self.ID) print r # self.assertEqual(self.objects[0].split("_")[1], r) self.fail() def testMajorityVotes(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(3, self.ID) for _ in xrange(10): r = self.tc.majority_votes(self.ID) print r self.fail() def testComputeBlocking(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r1 = self.tc.compute_blocking(1, self.ID) r3 = self.tc.compute_blocking(3, self.ID) print r1, r3 self.fail() def testPrintWorkerSummary(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(10, self.ID) r = self.tc.print_worker_summary(True, self.ID) self.assertEqual(10, r.count("Confusion Matrix:")) self.assertEqual(10, r.count("Number of Annotations: 1")) for l in self.assigned_labels: self.assertEqual(1, r.count("Worker: " + l[0])) def testPrintObjectsProbs(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r = self.tc.print_objects_probs([], self.ID) print r self.fail() def testObjectProbs(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) r = self.tc.object_probs(self.objects[0], self.ID) from math import fsum self.assertAlmostEqual(1., fsum(r.values()), 10) def testPrintPriors(self): r = self.tc.print_priors(self.ID) for l in r.splitlines(): pass #self.assertIn(str(self.prior), l) # whe don't support priors def testClassPriors(self): r = self.tc.class_priors(self.ID) r = json.loads(r) for _, v in r.iteritems(): pass #self.assertEqual(self.prior, v) # whe don't support priors def testGetDawidSkene(self): self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID) self.tc.compute_blocking(3, self.ID) gold_labels = [{"objectName": self.objects[0], "correctCategory": "dog"}, {"objectName": self.objects[-1], "correctCategory": "pig"}] self.tc.load_gold_labels(gold_labels, self.ID) self.tc.compute_blocking(3, self.ID) r = self.tc.get_dawid_skene(self.ID) for k in ["workers", "objects", "id", "fixedPriors", "categories"]: self.assertTrue(k in r.keys()) for l in self.assigned_labels: self.assertTrue(l[0] in r["workers"].keys())