Exemple #1
0
def make_troia_job(job, gold_samples=[]):
    """
        Creates Troia job corresponding to passed Job object.
    """
    client = TroiaClient(settings.TROIA_HOST, None)
    job_id = make_job_id(job=job)
    TroiaJob.objects.create(job=job, troia_id=job_id)
    client.reset(job_id)
    client.load_categories(cost_matrix, job_id)
    client.load_gold_labels(gold_samples, job_id)
Exemple #2
0
def run_simulation(opts):
    dsas = TroiaClient(url, None)
    dsas.reset(opts.ID)
    dsas.load_categories(
        [(labels[i], {labels[i]:0., labels[1 - i]:1.}) for i in xrange(2)], opts.ID)
    workers, objects, golds = gen_items(opts)
    start_time = time.time()
    for i in build_progressbar("Simulation iterations")(xrange(opts.it)):
        if (i + 1) % DUMP_FREQ == 0:
            current = time.time()
            duration = current - start_time
            log.info("Average speed: %s (labels/sec)  assigned labels: %s         ",
                                    str(float(DUMP_FREQ) / duration), str(i))
            start_time = current

        r = random.random()
        if r < opts.gratio and golds:
            obj = random.choice(golds)
            dsas.load_gold_label(obj, labels[1], opts.ID)
        else:
            obj = random.choice(objects)
        dsas.load_worker_assigned_label(random.choice(workers),
                                        obj, random.choice(labels), opts.ID)
Exemple #3
0
 def setUp(self):
     self.tc = TroiaClient(self.base_url)
     self.ID = "42"
     self.tc.load_categories(self._labels(), self.ID)
Exemple #4
0
class TestClient(unittest.TestCase):

    base_url = "http://localhost:8080/GetAnotherLabel/rest/"
    labels = [u"dog", "cat", "pig"]
    objects = [u"o_dog_0", u"o_dog_1", "o_cat_0", "o_pig_0", "o_pig_1"]
    prior = .314
    miss_class_cost = .141

    def _misclass_cost(self, label):
        d = dict([(x, self.miss_class_cost) for x in self.labels
                  if x != label])
        d[label] = 0.
        return d

    def _labels(self):
        return [(l, self._misclass_cost(l)) for l in self.labels]

    assigned_labels = \
            [("worker" + str(i), objects[0], "dog") for i in xrange(9)] + \
            [("worker9", objects[0], "pig")]

    def setUp(self):
        self.tc = TroiaClient(self.base_url)
        self.ID = "42"
        self.tc.load_categories(self._labels(), self.ID)

    def tearDown(self):
        self.tc.reset(self.ID)

    def testSetUp(self):
        self.assertTrue(self.tc.exists(self.ID))
        self.assertFalse(self.tc.exists(self.ID + "0"))

    def testPing(self):
        r = self.tc.ping()
        prefix = "\"processing request at: "
        self.assertTrue(r.startswith(prefix))
        # I wanted to parse date but it its in some wired form so..

    def testReset(self):
        r = self.tc.reset(self.ID)
        self.assertTrue(r.startswith("nullified the ds object"))
        self.assertFalse(self.tc.exists(self.ID))

    def testExists(self):
        self.assertFalse(self.tc.exists(self.ID + "0"))
        self.assertTrue(self.tc.exists(self.ID))

    def testLoadCategories(self):
        ID = self.ID + "AnotherTestID"
        self.tc.reset(ID)
        self.assertFalse(self.tc.exists(ID))
        self.tc.load_categories(self._labels(), ID)
        self.assertTrue(self.tc.exists(ID))
        r = self.tc.get_dawid_skene(ID)
        for k, v in r['categories'].iteritems():
            self.assertTrue(k in self.labels)
            self.assertEqual(v['name'], k)
            for n, c in v['misclassification_cost'].iteritems():
                print n, c, k, v
                if n == k:
                    self.assertEqual(0., c)
                else:
                    self.assertEqual(self.miss_class_cost, c)
        self.tc.reset(ID)

    def testLoadCosts(self):
        self.tc.load_costs([(u"dog", "cat", 0.511)], self.ID)
        r = self.tc.get_dawid_skene(self.ID)
        self.assertEqual(
            0.511, r['categories'][u'dog']['misclassification_cost']['cat'])

    def testAssignLabel(self):
        job = self.assigned_labels[0]
        self.tc.load_worker_assigned_label(job[0], job[1], job[2], self.ID)
        r = self.tc.get_dawid_skene(self.ID)
        resp = r['workers'][job[0]]['labels'][0]
        print job
        sorted(job)
        sorted(resp.values())
        self.assertEqual(sorted(job), sorted(resp.values()))

    def testAssignLabels(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r = self.tc.get_dawid_skene(self.ID)['workers']
        for job in self.assigned_labels:
            resp = r[job[0]]['labels'][0]
            self.assertEqual(sorted(job), sorted(resp.values()))
#            self.assertEqual(job, r[job['workerName']]['labels'][0])

    def testGoldLabel(self):
        item = (self.objects[0], u"dog")
        self.tc.load_gold_label(item[0], item[1], self.ID)
        r = self.tc.get_dawid_skene(self.ID)['objects']
        self.assertEqual(item[1], r[item[0]]["correctCategory"])
        self.assertTrue(r[item[0]]["isGold"])

    def testGoldLabels(self):
        gold_labels = [(self.objects[0], "dog"), (self.objects[-1], "pig")]
        self.tc.load_gold_labels(gold_labels, self.ID)
        r = self.tc.get_dawid_skene(self.ID)['objects']
        for item in gold_labels:
            self.assertTrue(r[item[0]]["isGold"])
            self.assertEqual(item[1], r[item[0]]["correctCategory"])

    def testMajorityVote(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        for _ in xrange(10):
            r = self.tc.majority_vote(self.objects[0], self.ID)
            print r


#            self.assertEqual(self.objects[0].split("_")[1], r)
        self.fail()

    def testMajorityVotes(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        for _ in xrange(10):
            r = self.tc.majority_votes(self.ID)
            print r
        self.fail()

    def testComputeBlocking(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r1 = self.tc.compute_blocking(1, self.ID)
        r3 = self.tc.compute_blocking(3, self.ID)
        print r1, r3
        self.fail()

    def testPrintWorkerSummary(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(10, self.ID)
        r = self.tc.print_worker_summary(True, self.ID)
        self.assertEqual(10, r.count("Confusion Matrix:"))
        self.assertEqual(10, r.count("Number of Annotations: 1"))
        for l in self.assigned_labels:
            self.assertEqual(1, r.count("Worker: " + l[0]))

    def testPrintObjectsProbs(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r = self.tc.print_objects_probs([], self.ID)
        print r
        self.fail()

    def testObjectProbs(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r = self.tc.object_probs(self.objects[0], self.ID)
        from math import fsum
        self.assertAlmostEqual(1., fsum(r.values()), 10)

    def testPrintPriors(self):
        r = self.tc.print_priors(self.ID)
        for l in r.splitlines():
            pass
            #self.assertIn(str(self.prior), l)
            # whe don't support priors

    def testClassPriors(self):
        r = self.tc.class_priors(self.ID)
        r = json.loads(r)
        for _, v in r.iteritems():
            pass
            #self.assertEqual(self.prior, v)
            # whe don't support priors

    def testGetDawidSkene(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        gold_labels = [{
            "objectName": self.objects[0],
            "correctCategory": "dog"
        }, {
            "objectName": self.objects[-1],
            "correctCategory": "pig"
        }]
        self.tc.load_gold_labels(gold_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        r = self.tc.get_dawid_skene(self.ID)
        for k in ["workers", "objects", "id", "fixedPriors", "categories"]:
            self.assertTrue(k in r.keys())

        for l in self.assigned_labels:
            self.assertTrue(l[0] in r["workers"].keys())
def run_troia_on_data(cost_matrix, gold_samples, workers_labels):
    troia_client = TroiaClient(TROIA_SERVER_URL, TIMEOUT)
    # we create client instance

    ID = TUTORIAL_JOB_ID

    troia_client.reset(ID)
    # just to make sure that we don't interfere with some old data

    troia_client.load_categories(cost_matrix, ID)
    # we send cost matrix that contains all labels / categories

    troia_client.load_gold_labels(gold_samples, ID)
    # send samples for which we know correct label
    # we could also do this that way:
    # for object_id, label in gold_samples:
    #     troia_client.load_gold_label(object_id, label, ID)

    troia_client.load_worker_assigned_labels(workers_labels, ID)
    # send labels that worker assigned to objects
    # we could also do this that way:
    # for worker, object_id, label in workers_labels:
    #     troia_client.load_worker_assigned_label(worker, object_id, label, ID)

    troia_client.compute_blocking(ITERATIONS, ID)
    # we start actual calculations

    results = troia_client.get_dawid_skene(ID)
    # we collect results and stats

    # pprint.pprint(results)
    # and just print them with some formatting

    objects_data = results['objects'].items()
    # extracting data related to objects

    object_label_probabilities = [(object_id, params['categoryProbability'])
                                  for object_id, params in objects_data]
    # we extract label distributions for objects
    print "Object label probabilities:"
    pprint.pprint(sorted(object_label_probabilities))
    # and we print them

    # extracting data related to workers
    workers_summary = str(troia_client.print_worker_summary(True, ID))
    print
    print "Workers summary:"
    print workers_summary
def run_troia_on_data(cost_matrix, gold_samples, workers_labels):
    troia_client = TroiaClient(TROIA_SERVER_URL, TIMEOUT)
    # we create client instance

    ID = TUTORIAL_JOB_ID

    troia_client.reset(ID)
    # just to make sure that we don't interfere with some old data

    troia_client.load_categories(cost_matrix, ID)
    # we send cost matrix that contains all labels / categories

    troia_client.load_gold_labels(gold_samples, ID)
    # send samples for which we know correct label
    # we could also do this that way:
    # for object_id, label in gold_samples:
    #     troia_client.load_gold_label(object_id, label, ID)

    troia_client.load_worker_assigned_labels(workers_labels, ID)
    # send labels that worker assigned to objects
    # we could also do this that way:
    # for worker, object_id, label in workers_labels:
    #     troia_client.load_worker_assigned_label(worker, object_id, label, ID)

    troia_client.compute_blocking(ITERATIONS, ID)
    # we start actual calculations

    results = troia_client.get_dawid_skene(ID)
    # we collect results and stats

    # pprint.pprint(results)
    # and just print them with some formatting

    objects_data = results['objects'].items()
    # extracting data related to objects

    object_label_probabilities = [(object_id, params['categoryProbability'])
        for object_id, params in objects_data]
    # we extract label distributions for objects
    print "Object label probabilities:"
    pprint.pprint(sorted(object_label_probabilities))
    # and we print them

    # extracting data related to workers
    workers_summary = str(troia_client.print_worker_summary(True, ID))
    print
    print "Workers summary:"
    print workers_summary
 def setUp(self):
     self.tc = TroiaClient(self.base_url)
     self.ID = "42"
     self.tc.load_categories(self._labels(), self.ID)
class TestClient(unittest.TestCase):

    base_url = "http://localhost:8080/GetAnotherLabel/rest/"
    labels = [u"dog", "cat", "pig"]
    objects = [u"o_dog_0", u"o_dog_1", "o_cat_0", "o_pig_0", "o_pig_1"]
    prior = .314
    miss_class_cost = .141

    def _misclass_cost(self, label):
        d = dict([(x, self.miss_class_cost) for x in self.labels if x != label])
        d[label] = 0.
        return d

    def _labels(self):
        return [(l, self._misclass_cost(l)) for l in self.labels]

    assigned_labels = \
            [("worker" + str(i), objects[0], "dog") for i in xrange(9)] + \
            [("worker9", objects[0], "pig")]

    def setUp(self):
        self.tc = TroiaClient(self.base_url)
        self.ID = "42"
        self.tc.load_categories(self._labels(), self.ID)

    def tearDown(self):
        self.tc.reset(self.ID)

    def testSetUp(self):
        self.assertTrue(self.tc.exists(self.ID))
        self.assertFalse(self.tc.exists(self.ID + "0"))

    def testPing(self):
        r = self.tc.ping()
        prefix = "\"processing request at: "
        self.assertTrue(r.startswith(prefix))
        # I wanted to parse date but it its in some wired form so..

    def testReset(self):
        r = self.tc.reset(self.ID)
        self.assertTrue(r.startswith("nullified the ds object"))
        self.assertFalse(self.tc.exists(self.ID))

    def testExists(self):
        self.assertFalse(self.tc.exists(self.ID + "0"))
        self.assertTrue(self.tc.exists(self.ID))

    def testLoadCategories(self):
        ID = self.ID + "AnotherTestID"
        self.tc.reset(ID)
        self.assertFalse(self.tc.exists(ID))
        self.tc.load_categories(self._labels(), ID)
        self.assertTrue(self.tc.exists(ID))
        r = self.tc.get_dawid_skene(ID)
        for k, v in r['categories'].iteritems():
            self.assertTrue(k in self.labels)
            self.assertEqual(v['name'], k)
            for n, c in v['misclassification_cost'].iteritems():
                print n, c, k, v
                if n == k:
                    self.assertEqual(0., c)
                else:
                    self.assertEqual(self.miss_class_cost, c)
        self.tc.reset(ID)

    def testLoadCosts(self):
        self.tc.load_costs(
            [(u"dog", "cat", 0.511)], self.ID)
        r = self.tc.get_dawid_skene(self.ID)
        self.assertEqual(0.511,
                r['categories'][u'dog']['misclassification_cost']['cat'])

    def testAssignLabel(self):
        job = self.assigned_labels[0]
        self.tc.load_worker_assigned_label(job[0], job[1], job[2], self.ID)
        r = self.tc.get_dawid_skene(self.ID)
        resp = r['workers'][job[0]]['labels'][0]
        print job
        sorted(job)
        sorted(resp.values())
        self.assertEqual(sorted(job), sorted(resp.values()))

    def testAssignLabels(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r = self.tc.get_dawid_skene(self.ID)['workers']
        for job in self.assigned_labels:
            resp = r[job[0]]['labels'][0]
            self.assertEqual(sorted(job), sorted(resp.values()))
#            self.assertEqual(job, r[job['workerName']]['labels'][0])

    def testGoldLabel(self):
        item = (self.objects[0], u"dog")
        self.tc.load_gold_label(item[0], item[1], self.ID)
        r = self.tc.get_dawid_skene(self.ID)['objects']
        self.assertEqual(item[1], r[item[0]]["correctCategory"])
        self.assertTrue(r[item[0]]["isGold"])

    def testGoldLabels(self):
        gold_labels = [(self.objects[0], "dog"), (self.objects[-1], "pig")]
        self.tc.load_gold_labels(gold_labels, self.ID)
        r = self.tc.get_dawid_skene(self.ID)['objects']
        for item in gold_labels:
            self.assertTrue(r[item[0]]["isGold"])
            self.assertEqual(item[1], r[item[0]]["correctCategory"])

    def testMajorityVote(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        for _ in xrange(10):
            r = self.tc.majority_vote(self.objects[0], self.ID)
            print r
#            self.assertEqual(self.objects[0].split("_")[1], r)
        self.fail()

    def testMajorityVotes(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        for _ in xrange(10):
            r = self.tc.majority_votes(self.ID)
            print r
        self.fail()

    def testComputeBlocking(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r1 = self.tc.compute_blocking(1, self.ID)
        r3 = self.tc.compute_blocking(3, self.ID)
        print r1, r3
        self.fail()

    def testPrintWorkerSummary(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(10, self.ID)
        r = self.tc.print_worker_summary(True, self.ID)
        self.assertEqual(10, r.count("Confusion Matrix:"))
        self.assertEqual(10, r.count("Number of Annotations: 1"))
        for l in self.assigned_labels:
            self.assertEqual(1, r.count("Worker: " + l[0]))

    def testPrintObjectsProbs(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r = self.tc.print_objects_probs([], self.ID)
        print r
        self.fail()

    def testObjectProbs(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        r = self.tc.object_probs(self.objects[0], self.ID)
        from math import fsum
        self.assertAlmostEqual(1., fsum(r.values()), 10)

    def testPrintPriors(self):
        r = self.tc.print_priors(self.ID)
        for l in r.splitlines():
            pass
            #self.assertIn(str(self.prior), l)
            # whe don't support priors

    def testClassPriors(self):
        r = self.tc.class_priors(self.ID)
        r = json.loads(r)
        for _, v in r.iteritems():
            pass
            #self.assertEqual(self.prior, v)
            # whe don't support priors

    def testGetDawidSkene(self):
        self.tc.load_worker_assigned_labels(self.assigned_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        gold_labels = [{"objectName": self.objects[0], "correctCategory": "dog"},
                    {"objectName": self.objects[-1], "correctCategory": "pig"}]
        self.tc.load_gold_labels(gold_labels, self.ID)
        self.tc.compute_blocking(3, self.ID)
        r = self.tc.get_dawid_skene(self.ID)
        for k in ["workers", "objects", "id", "fixedPriors", "categories"]:
            self.assertTrue(k in r.keys())

        for l in self.assigned_labels:
            self.assertTrue(l[0] in r["workers"].keys())