예제 #1
0
 def test_get_negative_adjudication_relevancy_no_cache_no_results(self):
     """
     Test that ``get_negative_adjudication_relevancy`` returns None when in a
     pre-refine state when there are no negative adjudications.
     """
     iqrs = IqrSession()
     assert iqrs.get_negative_adjudication_relevancy() == []
예제 #2
0
 def test_get_unadjudicated_relevancy_no_cache_no_results(self):
     """
     Test that ``get_unadjudicated_relevancy`` returns None when in a
     pre-refine state when there is results state.
     """
     iqrs = IqrSession()
     assert iqrs.get_unadjudicated_relevancy() == []
예제 #3
0
    def test_adjudicate_remove_pos_neg(self):
        """
        Test that we can remove positive and negative adjudications using
        "un_*" parameters.
        """
        iqrs = IqrSession()

        # Set initial state
        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        p1 = DescriptorMemoryElement('', 1).set_vector([1])
        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        n3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])

        # Set initial state
        iqrs.positive_descriptors = {p0, p1, p2}
        iqrs.negative_descriptors = {n3, n4}

        # "Un-Adjudicate" descriptors individually
        iqrs.adjudicate(un_positives=[p1])
        assert iqrs.positive_descriptors == {p0, p2}
        assert iqrs.negative_descriptors == {n3, n4}
        iqrs.adjudicate(un_negatives=[n3])
        assert iqrs.positive_descriptors == {p0, p2}
        assert iqrs.negative_descriptors == {n4}

        # "Un-Adjudicate" collectively
        iqrs.adjudicate(un_positives=[p0, p2], un_negatives=[n4])
        assert iqrs.positive_descriptors == set()
        assert iqrs.negative_descriptors == set()
예제 #4
0
    def test_adjudication_switch(self):
        """
        Test providing positives and negatives on top of an existing state such
        that the descriptor adjudications are reversed. (what was once positive
        is now negative, etc.)
        """
        iqrs = IqrSession()

        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        p1 = DescriptorMemoryElement('', 1).set_vector([1])
        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        n3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])

        # Set initial state
        iqrs.positive_descriptors = {p0, p1, p2}
        iqrs.negative_descriptors = {n3, n4}

        # Adjudicate, partially swapping adjudications individually
        iqrs.adjudicate(new_positives=[n3])
        assert iqrs.positive_descriptors == {p0, p1, p2, n3}
        assert iqrs.negative_descriptors == {n4}

        iqrs.adjudicate(new_negatives=[p1])
        assert iqrs.positive_descriptors == {p0, p2, n3}
        assert iqrs.negative_descriptors == {n4, p1}

        # Adjudicate swapping remaining at the same time
        iqrs.adjudicate(new_positives=[n4], new_negatives=[p0, p2])
        assert iqrs.positive_descriptors == {n3, n4}
        assert iqrs.negative_descriptors == {p0, p1, p2}
예제 #5
0
 def test_ordered_results_no_results_no_cache(self):
     """
     Test that an empty list is returned when ``ordered_results`` is called
     before any refinement has occurred.
     """
     iqrs = IqrSession()
     assert iqrs.ordered_results() == []
예제 #6
0
 def test_refine_no_rel_index(self):
     """
     Test that refinement cannot occur if there is no relevancy index
     instance yet.
     """
     iqrs = IqrSession()
     with pytest.raises(RuntimeError, match="No relevancy index yet"):
         iqrs.refine()
예제 #7
0
    def test_get_unadjudicated_relevancy_has_cache(self):
        """
        Test that a shallow copy of the cached list is returned if there is a
        cache.
        """
        iqrs = IqrSession()

        iqrs._ordered_non_adj = ['simulation', 'cache']
        actual = iqrs.get_unadjudicated_relevancy()
        assert actual == ['simulation', 'cache']
        assert id(actual) != id(iqrs._ordered_non_adj)
예제 #8
0
 def test_ordered_results_has_cache(self):
     """
     Test that a shallow copy of the cached list is returned when there is
     a cache.
     """
     iqrs = IqrSession()
     # Simulate there being a cache
     iqrs._ordered_pos = ['simulated', 'cache']
     actual = iqrs.get_positive_adjudication_relevancy()
     assert actual == iqrs._ordered_pos
     assert id(actual) != id(iqrs._ordered_pos)
예제 #9
0
    def test_reset_result_cache_invalidation(self):
        """
        Test that calling the reset method resets the result view caches to
        None.
        """
        # Setup initial IQR session state
        iqrs = IqrSession()
        iqrs._ordered_pos = iqrs._ordered_neg = iqrs._ordered_non_adj = True

        iqrs.reset()
        assert iqrs._ordered_pos is None
        assert iqrs._ordered_neg is None
        assert iqrs._ordered_non_adj is None
예제 #10
0
    def test_get_session_info(self):
        """
        Test a valid retrieval of a complex IQR session state.
        """
        rank_relevancy_with_feedback = mock.MagicMock(
            spec=RankRelevancyWithFeedback)
        iqrs = IqrSession(rank_relevancy_with_feedback, session_uid='abc')

        ep, en, p1, p2, p3, n1, n2, d1, d2, n3 = [
            DescriptorMemoryElement('test', uid)
            for uid in ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
            # ep   en   p1   p2   p3   n1   n2   d1   d2   n3
        ]  # C              C         C    C    C    C
        #     ^Contributing^

        # Current adjudications
        iqrs.external_positive_descriptors = {ep}
        iqrs.positive_descriptors = {p1, p2, p3}
        iqrs.external_negative_descriptors = {en}
        iqrs.negative_descriptors = {n1, n2, n3}
        # "Last Refine" adjudications
        # - simulating that "currently" neutral descriptors were previous
        #   adjudicated.
        iqrs.rank_contrib_pos = {p2, d1}
        iqrs.rank_contrib_pos_ext = {ep}
        iqrs.rank_contrib_neg = {n1, n3, d2}
        iqrs.rank_contrib_neg_ext = set()
        # mock working set with
        iqrs.working_set.add_many_descriptors([p1, p2, p3, n1, n2, d1, d2, n3])

        self.app.controller.add_session(iqrs)

        with self.app.test_client() as tc:
            #: :type: flask.wrappers.Response
            r = tc.get('/session?sid=abc')
            self.assertStatusCode(r, 200)
            r_json = r.json
            assert r_json['sid'] == 'abc'
            # That everything included in "current" adjudications is included
            # here.
            assert set(r_json['uuids_pos_ext']) == {'a'}
            assert set(r_json['uuids_pos']) == {'c', 'd', 'e'}
            assert set(r_json['uuids_neg_ext']) == {'b'}
            assert set(r_json['uuids_neg']) == {'f', 'g', 'j'}
            # That those marked as "contributing" are included here
            assert set(r_json['uuids_pos_in_model']) == {'d', 'h'}
            assert set(r_json['uuids_pos_ext_in_model']) == {'a'}
            assert set(r_json['uuids_neg_in_model']) == {'f', 'j', 'i'}
            assert set(r_json['uuids_neg_ext_in_model']) == set()
            # IQR working set expected size
            assert r_json['wi_count'] == 8
예제 #11
0
    def test_adjudicate_unadj_noeffect(self):
        """
        Test that an empty call, or un-adjudicating a descriptor that is not
        currently marked as a positive or negative, causes no state change.
        """
        iqrs = IqrSession()

        # Set initial state
        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        p1 = DescriptorMemoryElement('', 1).set_vector([1])
        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        n3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])

        # Set initial state
        iqrs.positive_descriptors = {p0, p1, p2}
        iqrs.negative_descriptors = {n3, n4}

        # Empty adjudication
        iqrs.adjudicate()
        assert iqrs.positive_descriptors == {p0, p1, p2}
        assert iqrs.negative_descriptors == {n3, n4}

        # Attempt un-adjudication of a non-adjudicated element.
        e = DescriptorMemoryElement('', 5).set_vector([5])
        iqrs.adjudicate(un_positives=[e], un_negatives=[e])
        assert iqrs.positive_descriptors == {p0, p1, p2}
        assert iqrs.negative_descriptors == {n3, n4}
예제 #12
0
    def test_adjudicate_add_duplicates(self):
        """
        Test that adding duplicate descriptors as positive or negative
        adjudications has no effect as the behavior of sets should be observed.
        """
        iqrs = IqrSession()

        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        n1 = DescriptorMemoryElement('', 1).set_vector([1])
        p3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])

        # Partially add the above descriptors
        iqrs.adjudicate(new_positives=[p0], new_negatives=[n1])
        assert iqrs.positive_descriptors == {p0}
        assert iqrs.negative_descriptors == {n1}

        # Add all descriptors, observing that that already added descriptors
        # are ignored.
        iqrs.adjudicate(new_positives=[p0, p2, p3], new_negatives=[n1, n4])
        assert iqrs.positive_descriptors == {p0, p2, p3}
        assert iqrs.negative_descriptors == {n1, n4}

        # Duplicate previous call so no new descriptors are added. No change or
        # issue should be observed.
        iqrs.adjudicate(new_positives=[p0, p2, p3], new_negatives=[n1, n4])
        assert iqrs.positive_descriptors == {p0, p2, p3}
        assert iqrs.negative_descriptors == {n1, n4}
예제 #13
0
    def test_adjudicate_combined_remove_unadj(self):
        """
        Test combining adjudication switching with un-adjudication.
        """
        iqrs = IqrSession()

        # Set initial state
        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        p1 = DescriptorMemoryElement('', 1).set_vector([1])
        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        n3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])

        # Set initial state
        iqrs.positive_descriptors = {p0, p1, p2}
        iqrs.negative_descriptors = {n3, n4}

        # Add p5, switch p1 to negative, unadj p2
        p5 = DescriptorMemoryElement('', 5).set_vector([5])
        iqrs.adjudicate(new_positives=[p5],
                        new_negatives=[p1],
                        un_positives=[p2])
        assert iqrs.positive_descriptors == {p0, p5}
        assert iqrs.negative_descriptors == {n3, n4, p1}

        # Add n6, switch n4 to positive, unadj n3
        n6 = DescriptorMemoryElement('', 6).set_vector([6])
        iqrs.adjudicate(new_positives=[n4],
                        new_negatives=[n6],
                        un_negatives=[n3])
        assert iqrs.positive_descriptors == {p0, p5, n4}
        assert iqrs.negative_descriptors == {p1, n6}
예제 #14
0
    def test_refine_no_pos(self):
        """
        Test that refinement cannot occur if there are no positive descriptor
        external/adjudicated elements.
        """
        iqrs = IqrSession()
        # Mock relevancy index in order to check how its called and mock return
        # value.
        iqrs.rel_index = mock.MagicMock(spec=RelevancyIndex)
        # Mock length to be non-zero to simulate it having contents
        iqrs.rel_index.__len__.return_value = 1

        with pytest.raises(RuntimeError,
                           match='Did not find at least one '
                           'positive adjudication'):
            iqrs.refine()
예제 #15
0
 def setup_method(cls):
     """
     Setup an iqr session with a mocked rank relevancy
     """
     rank_relevancy_with_feedback = mock.MagicMock(
         spec=RankRelevancyWithFeedback)
     cls.iqrs = IqrSession(rank_relevancy_with_feedback)
예제 #16
0
def train_classifier_iqr(config, iqr_state_fp):
    #: :type: smqtk.algorithms.SupervisedClassifier
    classifier = from_config_dict(config['classifier'],
                                  SupervisedClassifier.get_impls())

    # Load state into an empty IqrSession instance.
    with open(iqr_state_fp, 'rb') as f:
        state_bytes = f.read().strip()
    descr_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
    iqrs = IqrSession()
    iqrs.set_state_bytes(state_bytes, descr_factory)

    # Positive descriptor examples for training are composed of those from
    # external and internal sets. Same for negative descriptor examples.
    pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors
    neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors
    classifier.train(class_examples={'positive': pos, 'negative': neg})
예제 #17
0
    def test_ordered_results_has_results_post_reset(self):
        """
        Test that an empty list is returned after a reset where there was a
        cached value before the reset.
        """
        iqrs = IqrSession()

        # Mocking results map existing for return.
        d0 = DescriptorMemoryElement('', 0).set_vector([0])
        d1 = DescriptorMemoryElement('', 1).set_vector([1])
        d2 = DescriptorMemoryElement('', 2).set_vector([2])
        d3 = DescriptorMemoryElement('', 3).set_vector([3])
        iqrs.results = {
            d0: 0.0,
            d1: 0.8,
            d2: 0.2,
            d3: 0.4,
        }

        # Initial call to ``ordered_results`` should have a non-None return.
        assert iqrs.ordered_results() is not None

        iqrs.reset()

        # Post-reset, there should be no results nor cache.
        actual = iqrs.ordered_results()
        assert actual == []
예제 #18
0
def train_classifier_iqr(config, iqr_state_fp):
    #: :type: smqtk.algorithms.SupervisedClassifier
    classifier = from_plugin_config(
        config['classifier'],
        get_classifier_impls(sub_interface=SupervisedClassifier)
    )

    # Load state into an empty IqrSession instance.
    with open(iqr_state_fp, 'rb') as f:
        state_bytes = f.read().strip()
    descr_factory = DescriptorElementFactory(DescriptorMemoryElement, {})
    iqrs = IqrSession()
    iqrs.set_state_bytes(state_bytes, descr_factory)

    # Positive descriptor examples for training are composed of those from
    # external and internal sets. Same for negative descriptor examples.
    pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors
    neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors
    classifier.train(class_examples={'positive': pos, 'negative': neg})
예제 #19
0
    def test_get_session_info_invalid_session_id(self):
        """
        Test that passing an ID that does not map to any current session
        returns a 400 error.
        """
        # There are no sessions on server initialization.
        self._test_getter_sid_not_found('session')

        iqrs = IqrSession(session_uid='1')  # not '0', which is queried for.
        self.app.controller.add_session(iqrs)
        self._test_getter_sid_not_found('session')
예제 #20
0
    def test_ordered_results_has_results_no_cache(self):
        """
        Test that an appropriate list is returned by ``ordered_results`` after
        a refinement has occurred.
        """
        iqrs = IqrSession()

        # Mocking results map existing for return.
        d0 = DescriptorMemoryElement('', 0).set_vector([0])
        d1 = DescriptorMemoryElement('', 1).set_vector([1])
        d2 = DescriptorMemoryElement('', 2).set_vector([2])
        d3 = DescriptorMemoryElement('', 3).set_vector([3])
        iqrs.results = {
            d0: 0.0,
            d1: 0.8,
            d2: 0.2,
            d3: 0.4,
        }

        # Cache should be empty before call to ``ordered_results``
        assert iqrs._ordered_results is None

        with mock.patch('smqtk.iqr.iqr_session.sorted',
                        side_effect=sorted) as m_sorted:
            actual1 = iqrs.ordered_results()
            m_sorted.assert_called_once()

        expected = [(d1, 0.8), (d3, 0.4), (d2, 0.2), (d0, 0.0)]
        assert actual1 == expected

        # Calling the method a second time should not result in a ``sorted``
        # operation due to caching.
        with mock.patch('smqtk.iqr.iqr_session.sorted') as m_sorted:
            actual2 = iqrs.ordered_results()
            m_sorted.assert_not_called()

        assert actual2 == expected
        # Both returns should be shallow copies, thus not the same list
        # instances.
        assert id(actual1) != id(actual2)
예제 #21
0
    def test_add_iqr_state_classifier_simple(self):
        """
        Test calling IQR classifier add endpoint with a simple IQR Session
        serialization.
        """
        # Make a simple session with dummy adjudication descriptor elements
        iqrs = IqrSession(session_uid=str("0"))
        iqr_p1 = DescriptorMemoryElement('test', 0).set_vector([0])
        iqr_n1 = DescriptorMemoryElement('test', 1).set_vector([1])
        iqrs.adjudicate(new_positives=[iqr_p1], new_negatives=[iqr_n1])

        test_iqrs_b64 = base64.b64encode(iqrs.get_state_bytes())
        test_label = 'test-label-08976azsdv'

        with mock.patch(STUB_CLASSIFIER_MOD_PATH +
                        ".DummySupervisedClassifier._train") as m_cfier_train:

            with self.app.test_client() as cli:
                rv = cli.post('/iqr_classifier',
                              data={
                                  'bytes_b64': test_iqrs_b64,
                                  'label': test_label,
                              })
                self.assertStatus(rv, 201)
                self.assertResponseMessageRegex(
                    rv, "Finished training "
                    "IQR-session-based "
                    "classifier for label "
                    "'%s'." % test_label)

            m_cfier_train.assert_called_once_with({
                'positive': {iqr_p1},
                'negative': {iqr_n1}
            })
            # Collection should include initial dummy classifier and new iqr
            # classifier.
            self.assertEqual(len(self.app.classifier_collection.labels()), 2)
            self.assertIn(test_label, self.app.classifier_collection.labels())
예제 #22
0
    def test_get_session_info_invalid_session_id(self):
        """
        Test that passing an ID that does not map to any current session
        returns a 400 error.
        """
        # There are no sessions on server initialization.
        self._test_getter_sid_not_found('session')

        rank_relevancy_with_feedback = mock.MagicMock(
            spec=RankRelevancyWithFeedback)
        iqrs = IqrSession(rank_relevancy_with_feedback,
                          session_uid='1')  # not '0', which is queried for.
        self.app.controller.add_session(iqrs)
        self._test_getter_sid_not_found('session')
예제 #23
0
    def test_add_iqr_state_classifier_simple(self):
        """
        Test calling IQR classifier add endpoint with a simple IQR Session
        serialization.
        """
        # Make a simple session with dummy adjudication descriptor elements
        iqrs = IqrSession(session_uid=str("0"))
        iqr_p1 = DescriptorMemoryElement('test', 0).set_vector([0])
        iqr_n1 = DescriptorMemoryElement('test', 1).set_vector([1])
        iqrs.adjudicate(
            new_positives=[iqr_p1], new_negatives=[iqr_n1]
        )

        test_iqrs_b64 = base64.b64encode(iqrs.get_state_bytes())
        test_label = 'test-label-08976azsdv'

        with mock.patch(STUB_CLASSIFIER_MOD_PATH +
                        ".DummySupervisedClassifier._train") as m_cfier_train:

            with self.app.test_client() as cli:
                rv = cli.post('/iqr_classifier', data={
                    'bytes_b64': test_iqrs_b64,
                    'label': test_label,
                })
                self.assertStatus(rv, 201)
                self.assertResponseMessageRegex(rv, "Finished training "
                                                    "IQR-session-based "
                                                    "classifier for label "
                                                    "'%s'." % test_label)

            m_cfier_train.assert_called_once_with(
                {'positive': {iqr_p1}, 'negative': {iqr_n1}}
            )
            # Collection should include initial dummy classifier and new iqr
            # classifier.
            self.assertEqual(len(self.app.classifier_collection.labels()), 2)
            self.assertIn(test_label, self.app.classifier_collection.labels())
예제 #24
0
    def test_get_unadjudicated_relevancy_no_cache_has_results(self):
        """
        Test that we get the non-adjudicated DescriptorElements and their
        scores correctly from a non-cached state with known results.
        """
        iqrs = IqrSession()

        d0 = DescriptorMemoryElement('', 0).set_vector([0])
        d1 = DescriptorMemoryElement('', 1).set_vector([1])
        d2 = DescriptorMemoryElement('', 2).set_vector([2])
        d3 = DescriptorMemoryElement('', 3).set_vector([3])

        # Simulate a populated contributing adjudication state (there must be
        # some positives for a simulated post-refine state to be valid).
        iqrs.rank_contrib_pos = {d1}
        iqrs.rank_contrib_neg = {d0}

        # Simulate post-refine results map.
        iqrs.results = {
            d0: 0.1,
            d1: 0.8,
            d2: 0.2,
            d3: 0.4,
        }

        # Cache should be initially empty
        assert iqrs._ordered_non_adj is None

        # Test that the appropriate sorting actually occurs.
        with mock.patch('smqtk.iqr.iqr_session.sorted',
                        side_effect=sorted) as m_sorted:
            actual1 = iqrs.get_unadjudicated_relevancy()
            m_sorted.assert_called_once()

        expected = [(d3, 0.4), (d2, 0.2)]
        assert actual1 == expected

        # Calling the method a second time should not result in a ``sorted``
        # operation due to caching.
        with mock.patch('smqtk.iqr.iqr_session.sorted',
                        side_effect=sorted) as m_sorted:
            actual2 = iqrs.get_unadjudicated_relevancy()
            m_sorted.assert_not_called()

        assert actual2 == expected
        # Both returns should be shallow copies, thus not the same list
        # instances.
        assert id(actual1) != id(actual2)
예제 #25
0
    def test_adjudicate_cache_resetting_negative(self):
        """
        Test results view cache resetting functionality on adjudicating certain
        ways.
        """
        e = DescriptorMemoryElement('', 0).set_vector([0])

        iqrs = IqrSession()
        iqrs._ordered_pos = True
        iqrs._ordered_neg = True
        iqrs._ordered_non_adj = True

        # Check that adding a positive adjudication resets the positive and
        # non-adjudicated result caches.
        iqrs.adjudicate(new_negatives=[e])
        assert iqrs._ordered_pos is True  # NOT reset
        assert iqrs._ordered_neg is None  # reset
        assert iqrs._ordered_non_adj is None  # reset
예제 #26
0
    def get_current_iqr_session(self):
        """
        Get the current IQR Session instance.

        :rtype: smqtk.IQR.iqr_session.IqrSession

        """
        with self._iqr_controller:
            sid = flask.session.sid
            if not self._iqr_controller.has_session_uuid(sid):
                iqr_sess = IqrSession(self._pos_seed_neighbors,
                                      self._rel_index_config, sid)
                self._iqr_controller.add_session(iqr_sess, sid)
                self._iqr_work_dirs[iqr_sess.uuid] = \
                    osp.join(self.work_dir, sid)
                safe_create_dir(self._iqr_work_dirs[iqr_sess.uuid])
                self._iqr_example_data[iqr_sess.uuid] = {}
                self._iqr_example_pos_descr[iqr_sess.uuid] = {}

            return self._iqr_controller.get_session(sid)
예제 #27
0
    def get_current_iqr_session(self):
        """
        Get the current IQR Session instance.

        :rtype: smqtk.IQR.iqr_session.IqrSession

        """
        with self._iqr_controller:
            sid = flask.session.sid
            if not self._iqr_controller.has_session_uuid(sid):
                sid_work_dir = osp.join(self.work_dir, sid)

                iqr_sess = IqrSession(sid_work_dir, self._descriptor_generator,
                                      self._nn_index,
                                      self._pos_seed_neighbors,
                                      self._rel_index_config,
                                      self._descr_elem_factory,
                                      sid)
                self._iqr_controller.add_session(iqr_sess, sid)

            return self._iqr_controller.get_session(sid)
예제 #28
0
    def test_adjudicate_new_pos_neg(self):
        """
        Test that providing iterables to ``new_positives`` and
        ``new_negatives`` parameters result in additions to the positive and
        negative sets respectively.
        """
        iqrs = IqrSession()

        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        iqrs.adjudicate(new_positives=[p0])
        assert iqrs.positive_descriptors == {p0}
        assert iqrs.negative_descriptors == set()

        n1 = DescriptorMemoryElement('', 1).set_vector([1])
        iqrs.adjudicate(new_negatives=[n1])
        assert iqrs.positive_descriptors == {p0}
        assert iqrs.negative_descriptors == {n1}

        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        p3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])
        iqrs.adjudicate(new_positives=[p2, p3], new_negatives=[n4])
        assert iqrs.positive_descriptors == {p0, p2, p3}
        assert iqrs.negative_descriptors == {n1, n4}
예제 #29
0
    def createSession(self, params):
        smqtkFolder = params['smqtkFolder']
        sessionsFolder = getCreateSessionsFolder()

        # Get the folder with images in it, since this is what's used for computing
        # what descriptor set table to use
        dataFolderId = ModelImporter.model('folder').load(
            ObjectId(smqtkFolder), user=getCurrentUser())
        dataFolderId = str(dataFolderId['parentId'])

        # Create session named after its id
        session = ModelImporter.model('item').createItem(
            'placeholder_name', getCurrentUser(), sessionsFolder)
        session['name'] = str(session['_id'])
        ModelImporter.model('item').save(session)
        sessionId = str(session['_id'])
        ModelImporter.model('item').setMetadata(
            session, {
                'smqtk_folder_id': smqtkFolder,
                'data_folder_id': dataFolderId,
                'pos_uuids': [],
                'neg_uuids': []
            })

        # already registered in the controller, return
        if self.controller.has_session_uuid(sessionId):
            return session

        iqrs = IqrSession(self.positive_seed_neighbors, session_uid=sessionId)

        with self.controller:
            with iqrs:  # because classifier maps locked by session
                self.controller.add_session(iqrs)
                self.session_classifiers[sessionId] = None
                self.session_classifier_dirty[sessionId] = True

        return session
예제 #30
0
    def test_adjudicate_both_labels(self):
        """
        Test that providing a descriptor element as both a positive AND
        negative adjudication causes no state change..
        """
        iqrs = IqrSession()

        # Set initial state
        p0 = DescriptorMemoryElement('', 0).set_vector([0])
        p1 = DescriptorMemoryElement('', 1).set_vector([1])
        p2 = DescriptorMemoryElement('', 2).set_vector([2])
        n3 = DescriptorMemoryElement('', 3).set_vector([3])
        n4 = DescriptorMemoryElement('', 4).set_vector([4])

        # Set initial state
        iqrs.positive_descriptors = {p0, p1, p2}
        iqrs.negative_descriptors = {n3, n4}

        # Attempt adjudicating a new element as both postive AND negative
        e = DescriptorMemoryElement('', 5).set_vector([5])
        iqrs.adjudicate(new_positives=[e], new_negatives=[e])
        assert iqrs.positive_descriptors == {p0, p1, p2}
        assert iqrs.negative_descriptors == {n3, n4}
예제 #31
0
    def add_iqr_state_classifier(self):
        """
        Train a classifier based on the user-provided IQR state file bytes in
        a base64 encoding, matched with a descriptive label of that
        classifier's topic.

        Since all IQR session classifiers end up only having two result
        classes (positive and negative), the topic of the classifier is
        encoded in the descriptive label the user applies to the classifier.

        Below is an example call to this endpoint via the ``requests`` python
        module, showing how base64 data is sent::

            import base64
            import requests
            data_bytes = "Load some content bytes here."
            requests.get('http://localhost:5000/iqr_classifier',
                         data={'bytes_b64': base64.b64encode(data_bytes),
                               'label': 'some_label'})

        With curl on the command line::

            $ curl -X POST localhost:5000/iqr_classifier \
                -d "label=some_label" \
                --data-urlencode "bytes_b64=$(base64 -w0 /path/to/file)"

            # If this fails, you may wish to encode the file separately and
            # use the file reference syntax instead:

            $ base64 -w0 /path/to/file > /path/to/file.b64
            $ curl -X POST localhost:5000/iqr_classifier -d label=some_label \
                --data-urlencode bytes_64@/path/to/file.b64

        To lock this classifier and guard it against deletion, add
        "lock_label=true"::

            $ curl -X POST localhost:5000/iqr_classifier \
                -d "label=some_label" \
                -d "lock_label=true" \
                --data-urlencode "bytes_b64=$(base64 -w0 /path/to/file)"

        Form arguments:
            iqr_state_b64
                base64 encoding of the bytes of the IQR session state save
                file.
            label
                Descriptive label to apply to this classifier. This should not
                conflict with existing classifier labels.
            lock_label
                If 'true', disallow deletion of this label. If 'false', allow
                deletion of this label. Only has an effect if deletion is
                enabled for this service. (Default: 'false')

        Returns 201.

        """
        data_b64 = flask.request.values.get('bytes_b64', default=None)
        label = flask.request.values.get('label', default=None)
        lock_clfr_str = flask.request.values.get('lock_label',
                                                 default='false')

        if data_b64 is None or len(data_b64) == 0:
            return make_response_json("No state base64 data provided.", 400)
        elif label is None or len(label) == 0:
            return make_response_json("No descriptive label provided.", 400)
        try:
            lock_clfr = bool(flask.json.loads(lock_clfr_str))
        except JSON_DECODE_EXCEPTION:
            return make_response_json("Invalid boolean value for"
                                      " 'lock_label'. Was given: '%s'"
                                      % lock_clfr_str,
                                      400)
        try:
            # Using urlsafe version because it handles both regular and urlsafe
            # alphabets.
            data_bytes = base64.urlsafe_b64decode(data_b64.encode('utf-8'))
        except (TypeError, binascii.Error) as ex:
            return make_response_json("Invalid base64 input: %s" % str(ex)), \
                   400

        # If the given label conflicts with one already in the collection,
        # fail.
        if label in self.classifier_collection.labels():
            return make_response_json(
                "Label already exists in classifier collection.", 400)

        # Create dummy IqrSession to extract pos/neg descriptors.
        iqrs = IqrSession()
        iqrs.set_state_bytes(data_bytes, self.descriptor_factory)
        pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors
        neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors
        del iqrs

        # Make a classifier instance from the stored config for IQR
        # session-based classifiers.
        #: :type: SupervisedClassifier
        classifier = smqtk.utils.plugin.from_plugin_config(
            self.iqr_state_classifier_config,
            get_classifier_impls(sub_interface=SupervisedClassifier)
        )
        classifier.train(class_examples={'positive': pos, 'negative': neg})

        try:
            self.classifier_collection.add_classifier(label, classifier)

            # If we're allowing deletions, get the lock flag from the form and
            # set it for this classifier
            if self.enable_classifier_removal and lock_clfr:
                self.immutable_labels.add(label)

        except ValueError as e:
            if e.args[0].find('JSON') > -1:
                return make_response_json("Tried to parse malformed JSON in "
                                          "form argument.", 400)
            return make_response_json("Duplicate label ('%s') added during "
                                      "classifier training of provided IQR "
                                      "session state." % label, 400,
                                      label=label)

        return make_response_json("Finished training IQR-session-based "
                                  "classifier for label '%s'." % label,
                                  201,
                                  label=label)
예제 #32
0
    def test_refine_no_prev_results(self):
        """
        Test that the results of RelevancyIndex ranking are directly reflected
        in a new results dictionary of probability values, even for elements
        that were also used in adjudication.

        This test is useful because a previous state of the IQR Session
        structure would force return probabilities for some descriptor elements
        to certain values if they were also present in the positive or negative
        adjudicate (internal or external) sets.
        """
        # IqrSession instance. No config for rel_index because we will mock
        # that.
        iqrs = IqrSession()
        # Mock relevancy index in order to check how its called and mock return
        # value.
        iqrs.rel_index = mock.MagicMock(spec=RelevancyIndex)
        # Mock length to be non-zero to simulate it having contents
        iqrs.rel_index.__len__.return_value = 1

        test_in_pos_elem = DescriptorMemoryElement('t', 0).set_vector([0])
        test_in_neg_elem = DescriptorMemoryElement('t', 1).set_vector([1])
        test_ex_pos_elem = DescriptorMemoryElement('t', 2).set_vector([2])
        test_ex_neg_elem = DescriptorMemoryElement('t', 3).set_vector([3])
        test_other_elem = DescriptorMemoryElement('t', 4).set_vector([4])

        # Mock return dictionary, probabilities don't matter much other than
        # they are not 1.0 or 0.0.
        iqrs.rel_index.rank.return_value = \
            {e: 0.5 for e in [test_in_pos_elem, test_in_neg_elem,
                              test_other_elem]}

        # Asserting expected pre-condition where there are no results yet.
        assert iqrs.results is None

        # Prepare IQR state for refinement
        # - set dummy internal/external positive negatives.
        iqrs.external_descriptors(positive=[test_ex_pos_elem],
                                  negative=[test_ex_neg_elem])
        iqrs.adjudicate(new_positives=[test_in_pos_elem],
                        new_negatives=[test_in_neg_elem])

        # Test calling refine method
        iqrs.refine()

        # We test that:
        # - ``rel_index.rank`` called with the combination of
        #   external/adjudicated descriptor elements.
        # - ``results`` attribute now has a dict value
        # - value of ``results`` attribute is what we expect.
        iqrs.rel_index.rank.assert_called_once_with(
            {test_in_pos_elem, test_ex_pos_elem},
            {test_in_neg_elem, test_ex_neg_elem},
        )
        assert iqrs.results is not None
        assert len(iqrs.results) == 3
        assert test_other_elem in iqrs.results
        assert test_in_pos_elem in iqrs.results
        assert test_in_neg_elem in iqrs.results

        assert iqrs.results[test_other_elem] == 0.5
        assert iqrs.results[test_in_pos_elem] == 0.5
        assert iqrs.results[test_in_neg_elem] == 0.5
예제 #33
0
    def test_refine_with_prev_results(self):
        """
        Test that the results of RelevancyIndex ranking are directly reflected
        in an existing results dictionary of probability values.
        """
        # IqrSession instance. No config for rel_index because we will mock
        # that.
        iqrs = IqrSession()
        # Mock relevancy index in order to check how its called and mock return
        # value.
        iqrs.rel_index = mock.MagicMock(spec=RelevancyIndex)
        # Mock length to be non-zero to simulate it having contents
        iqrs.rel_index.__len__.return_value = 1

        test_in_pos_elem = DescriptorMemoryElement('t', 0).set_vector([0])
        test_in_neg_elem = DescriptorMemoryElement('t', 1).set_vector([1])
        test_ex_pos_elem = DescriptorMemoryElement('t', 2).set_vector([2])
        test_ex_neg_elem = DescriptorMemoryElement('t', 3).set_vector([3])
        test_other_elem = DescriptorMemoryElement('t', 4).set_vector([4])

        # Mock return dictionary, probabilities don't matter much other than
        # they are not 1.0 or 0.0.
        iqrs.rel_index.rank.return_value = \
            {e: 0.5 for e in [test_in_pos_elem, test_in_neg_elem,
                              test_other_elem]}

        # Create a "previous state" of the results dictionary containing
        # results from our "working set" of descriptor elements.
        iqrs.results = {
            test_in_pos_elem: 0.2,
            test_in_neg_elem: 0.2,
            test_other_elem: 0.2,
            # ``refine`` replaces the previous dict, so disjoint keys are
            # NOT retained.
            'something else': 0.3,
        }

        # Prepare IQR state for refinement
        # - set dummy internal/external positive negatives.
        iqrs.external_descriptors(positive=[test_ex_pos_elem],
                                  negative=[test_ex_neg_elem])
        iqrs.adjudicate(new_positives=[test_in_pos_elem],
                        new_negatives=[test_in_neg_elem])

        # Test calling refine method
        iqrs.refine()

        # We test that:
        # - ``rel_index.rank`` called with the combination of
        #   external/adjudicated descriptor elements.
        # - ``results`` attribute now has an dict value
        # - value of ``results`` attribute is what we expect.
        iqrs.rel_index.rank.assert_called_once_with(
            {test_in_pos_elem, test_ex_pos_elem},
            {test_in_neg_elem, test_ex_neg_elem},
        )
        assert iqrs.results is not None
        assert len(iqrs.results) == 3
        assert test_other_elem in iqrs.results
        assert test_in_pos_elem in iqrs.results
        assert test_in_neg_elem in iqrs.results
        assert 'something else' not in iqrs.results

        assert iqrs.results[test_other_elem] == 0.5
        assert iqrs.results[test_in_pos_elem] == 0.5
        assert iqrs.results[test_in_neg_elem] == 0.5