Beispiel #1
0
    def test_correct_despite_outliers(self):
        SAMPLE_COUNT = 10

        fine = [[o] for o in self.row_ids]
        coarse = [[o for o in self.row_ids]]
        groupings = [fine, coarse] + [self.grouping] * (SAMPLE_COUNT - 2)

        grouping = find_consensus_grouping(groupings)
        self._assert_correct(grouping)
Beispiel #2
0
    def test_simple(self):
        groupings = self.sample_groupings()
        grouping = find_consensus_grouping(groupings, debug=True)
        assert isinstance(grouping, list)
        for row in grouping:
            assert isinstance(row, loom.group.Row), row

        row_ids = set(row.row_id for row in grouping)
        assert len(row_ids) == len(grouping), 'grouping had duplicate rows'
        assert_set_equal(set(self.row_ids), row_ids)

        group_ids = sorted(list(set(row.group_id for row in grouping)))
        assert_equal(
            group_ids,
            range(len(group_ids)),
            'group ids were not a contiguous range of integers')
Beispiel #3
0
    def test_sorting(self):
        for i in xrange(10):
            groupings = self.sample_groupings()
            grouping = find_consensus_grouping(groupings, debug=True)
            assert_equal(
                grouping,
                sorted(
                    grouping,
                    key=lambda x: (x.group_id, -x.confidence, x.row_id)))

            group_ids = sorted(set(row.group_id for row in grouping))
            counts = [
                sum(1 for row in grouping if row.group_id == gid)
                for gid in group_ids
            ]
            assert_equal(counts, sorted(counts, reverse=True))
Beispiel #4
0
    def test_correct_on_noisy_data(self):
        SAMPLE_COUNT = 10
        GROUP_COUNT = len(self.grouping)

        object_index = {
            o: g
            for g, group in enumerate(self.grouping)
            for o in group
        }

        # each object is in the wrong place in one grouping
        groupings = []
        for g in range(SAMPLE_COUNT):
            groups = self.grouping
            for o in self.row_ids[g::SAMPLE_COUNT]:
                t = object_index[o]
                f = (t + 1) % GROUP_COUNT
                groups[t].remove(o)
                groups[f].append(o)
            groups = filter(len, groups)
            groupings.append(groups)

        grouping = find_consensus_grouping(groupings)
        self._assert_correct(grouping)
Beispiel #5
0
 def test_correct_on_perfect_data(self):
     for sample_count in range(1, 11):
         groupings = [self.grouping] * sample_count
         grouping = find_consensus_grouping(groupings)
         self._assert_correct(grouping, confidence=1.0)