def test_get_negative_adjudication_relevancy_no_cache_no_results(self): """ Test that ``get_negative_adjudication_relevancy`` returns None when in a pre-refine state when there are no negative adjudications. """ iqrs = IqrSession() assert iqrs.get_negative_adjudication_relevancy() == []
def test_get_unadjudicated_relevancy_no_cache_no_results(self): """ Test that ``get_unadjudicated_relevancy`` returns None when in a pre-refine state when there is results state. """ iqrs = IqrSession() assert iqrs.get_unadjudicated_relevancy() == []
def test_adjudicate_remove_pos_neg(self): """ Test that we can remove positive and negative adjudications using "un_*" parameters. """ iqrs = IqrSession() # Set initial state p0 = DescriptorMemoryElement('', 0).set_vector([0]) p1 = DescriptorMemoryElement('', 1).set_vector([1]) p2 = DescriptorMemoryElement('', 2).set_vector([2]) n3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) # Set initial state iqrs.positive_descriptors = {p0, p1, p2} iqrs.negative_descriptors = {n3, n4} # "Un-Adjudicate" descriptors individually iqrs.adjudicate(un_positives=[p1]) assert iqrs.positive_descriptors == {p0, p2} assert iqrs.negative_descriptors == {n3, n4} iqrs.adjudicate(un_negatives=[n3]) assert iqrs.positive_descriptors == {p0, p2} assert iqrs.negative_descriptors == {n4} # "Un-Adjudicate" collectively iqrs.adjudicate(un_positives=[p0, p2], un_negatives=[n4]) assert iqrs.positive_descriptors == set() assert iqrs.negative_descriptors == set()
def test_adjudication_switch(self): """ Test providing positives and negatives on top of an existing state such that the descriptor adjudications are reversed. (what was once positive is now negative, etc.) """ iqrs = IqrSession() p0 = DescriptorMemoryElement('', 0).set_vector([0]) p1 = DescriptorMemoryElement('', 1).set_vector([1]) p2 = DescriptorMemoryElement('', 2).set_vector([2]) n3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) # Set initial state iqrs.positive_descriptors = {p0, p1, p2} iqrs.negative_descriptors = {n3, n4} # Adjudicate, partially swapping adjudications individually iqrs.adjudicate(new_positives=[n3]) assert iqrs.positive_descriptors == {p0, p1, p2, n3} assert iqrs.negative_descriptors == {n4} iqrs.adjudicate(new_negatives=[p1]) assert iqrs.positive_descriptors == {p0, p2, n3} assert iqrs.negative_descriptors == {n4, p1} # Adjudicate swapping remaining at the same time iqrs.adjudicate(new_positives=[n4], new_negatives=[p0, p2]) assert iqrs.positive_descriptors == {n3, n4} assert iqrs.negative_descriptors == {p0, p1, p2}
def test_ordered_results_no_results_no_cache(self): """ Test that an empty list is returned when ``ordered_results`` is called before any refinement has occurred. """ iqrs = IqrSession() assert iqrs.ordered_results() == []
def test_refine_no_rel_index(self): """ Test that refinement cannot occur if there is no relevancy index instance yet. """ iqrs = IqrSession() with pytest.raises(RuntimeError, match="No relevancy index yet"): iqrs.refine()
def test_get_unadjudicated_relevancy_has_cache(self): """ Test that a shallow copy of the cached list is returned if there is a cache. """ iqrs = IqrSession() iqrs._ordered_non_adj = ['simulation', 'cache'] actual = iqrs.get_unadjudicated_relevancy() assert actual == ['simulation', 'cache'] assert id(actual) != id(iqrs._ordered_non_adj)
def test_ordered_results_has_cache(self): """ Test that a shallow copy of the cached list is returned when there is a cache. """ iqrs = IqrSession() # Simulate there being a cache iqrs._ordered_pos = ['simulated', 'cache'] actual = iqrs.get_positive_adjudication_relevancy() assert actual == iqrs._ordered_pos assert id(actual) != id(iqrs._ordered_pos)
def test_reset_result_cache_invalidation(self): """ Test that calling the reset method resets the result view caches to None. """ # Setup initial IQR session state iqrs = IqrSession() iqrs._ordered_pos = iqrs._ordered_neg = iqrs._ordered_non_adj = True iqrs.reset() assert iqrs._ordered_pos is None assert iqrs._ordered_neg is None assert iqrs._ordered_non_adj is None
def test_get_session_info(self): """ Test a valid retrieval of a complex IQR session state. """ rank_relevancy_with_feedback = mock.MagicMock( spec=RankRelevancyWithFeedback) iqrs = IqrSession(rank_relevancy_with_feedback, session_uid='abc') ep, en, p1, p2, p3, n1, n2, d1, d2, n3 = [ DescriptorMemoryElement('test', uid) for uid in ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] # ep en p1 p2 p3 n1 n2 d1 d2 n3 ] # C C C C C C # ^Contributing^ # Current adjudications iqrs.external_positive_descriptors = {ep} iqrs.positive_descriptors = {p1, p2, p3} iqrs.external_negative_descriptors = {en} iqrs.negative_descriptors = {n1, n2, n3} # "Last Refine" adjudications # - simulating that "currently" neutral descriptors were previous # adjudicated. iqrs.rank_contrib_pos = {p2, d1} iqrs.rank_contrib_pos_ext = {ep} iqrs.rank_contrib_neg = {n1, n3, d2} iqrs.rank_contrib_neg_ext = set() # mock working set with iqrs.working_set.add_many_descriptors([p1, p2, p3, n1, n2, d1, d2, n3]) self.app.controller.add_session(iqrs) with self.app.test_client() as tc: #: :type: flask.wrappers.Response r = tc.get('/session?sid=abc') self.assertStatusCode(r, 200) r_json = r.json assert r_json['sid'] == 'abc' # That everything included in "current" adjudications is included # here. assert set(r_json['uuids_pos_ext']) == {'a'} assert set(r_json['uuids_pos']) == {'c', 'd', 'e'} assert set(r_json['uuids_neg_ext']) == {'b'} assert set(r_json['uuids_neg']) == {'f', 'g', 'j'} # That those marked as "contributing" are included here assert set(r_json['uuids_pos_in_model']) == {'d', 'h'} assert set(r_json['uuids_pos_ext_in_model']) == {'a'} assert set(r_json['uuids_neg_in_model']) == {'f', 'j', 'i'} assert set(r_json['uuids_neg_ext_in_model']) == set() # IQR working set expected size assert r_json['wi_count'] == 8
def test_adjudicate_unadj_noeffect(self): """ Test that an empty call, or un-adjudicating a descriptor that is not currently marked as a positive or negative, causes no state change. """ iqrs = IqrSession() # Set initial state p0 = DescriptorMemoryElement('', 0).set_vector([0]) p1 = DescriptorMemoryElement('', 1).set_vector([1]) p2 = DescriptorMemoryElement('', 2).set_vector([2]) n3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) # Set initial state iqrs.positive_descriptors = {p0, p1, p2} iqrs.negative_descriptors = {n3, n4} # Empty adjudication iqrs.adjudicate() assert iqrs.positive_descriptors == {p0, p1, p2} assert iqrs.negative_descriptors == {n3, n4} # Attempt un-adjudication of a non-adjudicated element. e = DescriptorMemoryElement('', 5).set_vector([5]) iqrs.adjudicate(un_positives=[e], un_negatives=[e]) assert iqrs.positive_descriptors == {p0, p1, p2} assert iqrs.negative_descriptors == {n3, n4}
def test_adjudicate_add_duplicates(self): """ Test that adding duplicate descriptors as positive or negative adjudications has no effect as the behavior of sets should be observed. """ iqrs = IqrSession() p0 = DescriptorMemoryElement('', 0).set_vector([0]) p2 = DescriptorMemoryElement('', 2).set_vector([2]) n1 = DescriptorMemoryElement('', 1).set_vector([1]) p3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) # Partially add the above descriptors iqrs.adjudicate(new_positives=[p0], new_negatives=[n1]) assert iqrs.positive_descriptors == {p0} assert iqrs.negative_descriptors == {n1} # Add all descriptors, observing that that already added descriptors # are ignored. iqrs.adjudicate(new_positives=[p0, p2, p3], new_negatives=[n1, n4]) assert iqrs.positive_descriptors == {p0, p2, p3} assert iqrs.negative_descriptors == {n1, n4} # Duplicate previous call so no new descriptors are added. No change or # issue should be observed. iqrs.adjudicate(new_positives=[p0, p2, p3], new_negatives=[n1, n4]) assert iqrs.positive_descriptors == {p0, p2, p3} assert iqrs.negative_descriptors == {n1, n4}
def test_adjudicate_combined_remove_unadj(self): """ Test combining adjudication switching with un-adjudication. """ iqrs = IqrSession() # Set initial state p0 = DescriptorMemoryElement('', 0).set_vector([0]) p1 = DescriptorMemoryElement('', 1).set_vector([1]) p2 = DescriptorMemoryElement('', 2).set_vector([2]) n3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) # Set initial state iqrs.positive_descriptors = {p0, p1, p2} iqrs.negative_descriptors = {n3, n4} # Add p5, switch p1 to negative, unadj p2 p5 = DescriptorMemoryElement('', 5).set_vector([5]) iqrs.adjudicate(new_positives=[p5], new_negatives=[p1], un_positives=[p2]) assert iqrs.positive_descriptors == {p0, p5} assert iqrs.negative_descriptors == {n3, n4, p1} # Add n6, switch n4 to positive, unadj n3 n6 = DescriptorMemoryElement('', 6).set_vector([6]) iqrs.adjudicate(new_positives=[n4], new_negatives=[n6], un_negatives=[n3]) assert iqrs.positive_descriptors == {p0, p5, n4} assert iqrs.negative_descriptors == {p1, n6}
def test_refine_no_pos(self): """ Test that refinement cannot occur if there are no positive descriptor external/adjudicated elements. """ iqrs = IqrSession() # Mock relevancy index in order to check how its called and mock return # value. iqrs.rel_index = mock.MagicMock(spec=RelevancyIndex) # Mock length to be non-zero to simulate it having contents iqrs.rel_index.__len__.return_value = 1 with pytest.raises(RuntimeError, match='Did not find at least one ' 'positive adjudication'): iqrs.refine()
def setup_method(cls): """ Setup an iqr session with a mocked rank relevancy """ rank_relevancy_with_feedback = mock.MagicMock( spec=RankRelevancyWithFeedback) cls.iqrs = IqrSession(rank_relevancy_with_feedback)
def train_classifier_iqr(config, iqr_state_fp): #: :type: smqtk.algorithms.SupervisedClassifier classifier = from_config_dict(config['classifier'], SupervisedClassifier.get_impls()) # Load state into an empty IqrSession instance. with open(iqr_state_fp, 'rb') as f: state_bytes = f.read().strip() descr_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) iqrs = IqrSession() iqrs.set_state_bytes(state_bytes, descr_factory) # Positive descriptor examples for training are composed of those from # external and internal sets. Same for negative descriptor examples. pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors classifier.train(class_examples={'positive': pos, 'negative': neg})
def test_ordered_results_has_results_post_reset(self): """ Test that an empty list is returned after a reset where there was a cached value before the reset. """ iqrs = IqrSession() # Mocking results map existing for return. d0 = DescriptorMemoryElement('', 0).set_vector([0]) d1 = DescriptorMemoryElement('', 1).set_vector([1]) d2 = DescriptorMemoryElement('', 2).set_vector([2]) d3 = DescriptorMemoryElement('', 3).set_vector([3]) iqrs.results = { d0: 0.0, d1: 0.8, d2: 0.2, d3: 0.4, } # Initial call to ``ordered_results`` should have a non-None return. assert iqrs.ordered_results() is not None iqrs.reset() # Post-reset, there should be no results nor cache. actual = iqrs.ordered_results() assert actual == []
def train_classifier_iqr(config, iqr_state_fp): #: :type: smqtk.algorithms.SupervisedClassifier classifier = from_plugin_config( config['classifier'], get_classifier_impls(sub_interface=SupervisedClassifier) ) # Load state into an empty IqrSession instance. with open(iqr_state_fp, 'rb') as f: state_bytes = f.read().strip() descr_factory = DescriptorElementFactory(DescriptorMemoryElement, {}) iqrs = IqrSession() iqrs.set_state_bytes(state_bytes, descr_factory) # Positive descriptor examples for training are composed of those from # external and internal sets. Same for negative descriptor examples. pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors classifier.train(class_examples={'positive': pos, 'negative': neg})
def test_get_session_info_invalid_session_id(self): """ Test that passing an ID that does not map to any current session returns a 400 error. """ # There are no sessions on server initialization. self._test_getter_sid_not_found('session') iqrs = IqrSession(session_uid='1') # not '0', which is queried for. self.app.controller.add_session(iqrs) self._test_getter_sid_not_found('session')
def test_ordered_results_has_results_no_cache(self): """ Test that an appropriate list is returned by ``ordered_results`` after a refinement has occurred. """ iqrs = IqrSession() # Mocking results map existing for return. d0 = DescriptorMemoryElement('', 0).set_vector([0]) d1 = DescriptorMemoryElement('', 1).set_vector([1]) d2 = DescriptorMemoryElement('', 2).set_vector([2]) d3 = DescriptorMemoryElement('', 3).set_vector([3]) iqrs.results = { d0: 0.0, d1: 0.8, d2: 0.2, d3: 0.4, } # Cache should be empty before call to ``ordered_results`` assert iqrs._ordered_results is None with mock.patch('smqtk.iqr.iqr_session.sorted', side_effect=sorted) as m_sorted: actual1 = iqrs.ordered_results() m_sorted.assert_called_once() expected = [(d1, 0.8), (d3, 0.4), (d2, 0.2), (d0, 0.0)] assert actual1 == expected # Calling the method a second time should not result in a ``sorted`` # operation due to caching. with mock.patch('smqtk.iqr.iqr_session.sorted') as m_sorted: actual2 = iqrs.ordered_results() m_sorted.assert_not_called() assert actual2 == expected # Both returns should be shallow copies, thus not the same list # instances. assert id(actual1) != id(actual2)
def test_add_iqr_state_classifier_simple(self): """ Test calling IQR classifier add endpoint with a simple IQR Session serialization. """ # Make a simple session with dummy adjudication descriptor elements iqrs = IqrSession(session_uid=str("0")) iqr_p1 = DescriptorMemoryElement('test', 0).set_vector([0]) iqr_n1 = DescriptorMemoryElement('test', 1).set_vector([1]) iqrs.adjudicate(new_positives=[iqr_p1], new_negatives=[iqr_n1]) test_iqrs_b64 = base64.b64encode(iqrs.get_state_bytes()) test_label = 'test-label-08976azsdv' with mock.patch(STUB_CLASSIFIER_MOD_PATH + ".DummySupervisedClassifier._train") as m_cfier_train: with self.app.test_client() as cli: rv = cli.post('/iqr_classifier', data={ 'bytes_b64': test_iqrs_b64, 'label': test_label, }) self.assertStatus(rv, 201) self.assertResponseMessageRegex( rv, "Finished training " "IQR-session-based " "classifier for label " "'%s'." % test_label) m_cfier_train.assert_called_once_with({ 'positive': {iqr_p1}, 'negative': {iqr_n1} }) # Collection should include initial dummy classifier and new iqr # classifier. self.assertEqual(len(self.app.classifier_collection.labels()), 2) self.assertIn(test_label, self.app.classifier_collection.labels())
def test_get_session_info_invalid_session_id(self): """ Test that passing an ID that does not map to any current session returns a 400 error. """ # There are no sessions on server initialization. self._test_getter_sid_not_found('session') rank_relevancy_with_feedback = mock.MagicMock( spec=RankRelevancyWithFeedback) iqrs = IqrSession(rank_relevancy_with_feedback, session_uid='1') # not '0', which is queried for. self.app.controller.add_session(iqrs) self._test_getter_sid_not_found('session')
def test_add_iqr_state_classifier_simple(self): """ Test calling IQR classifier add endpoint with a simple IQR Session serialization. """ # Make a simple session with dummy adjudication descriptor elements iqrs = IqrSession(session_uid=str("0")) iqr_p1 = DescriptorMemoryElement('test', 0).set_vector([0]) iqr_n1 = DescriptorMemoryElement('test', 1).set_vector([1]) iqrs.adjudicate( new_positives=[iqr_p1], new_negatives=[iqr_n1] ) test_iqrs_b64 = base64.b64encode(iqrs.get_state_bytes()) test_label = 'test-label-08976azsdv' with mock.patch(STUB_CLASSIFIER_MOD_PATH + ".DummySupervisedClassifier._train") as m_cfier_train: with self.app.test_client() as cli: rv = cli.post('/iqr_classifier', data={ 'bytes_b64': test_iqrs_b64, 'label': test_label, }) self.assertStatus(rv, 201) self.assertResponseMessageRegex(rv, "Finished training " "IQR-session-based " "classifier for label " "'%s'." % test_label) m_cfier_train.assert_called_once_with( {'positive': {iqr_p1}, 'negative': {iqr_n1}} ) # Collection should include initial dummy classifier and new iqr # classifier. self.assertEqual(len(self.app.classifier_collection.labels()), 2) self.assertIn(test_label, self.app.classifier_collection.labels())
def test_get_unadjudicated_relevancy_no_cache_has_results(self): """ Test that we get the non-adjudicated DescriptorElements and their scores correctly from a non-cached state with known results. """ iqrs = IqrSession() d0 = DescriptorMemoryElement('', 0).set_vector([0]) d1 = DescriptorMemoryElement('', 1).set_vector([1]) d2 = DescriptorMemoryElement('', 2).set_vector([2]) d3 = DescriptorMemoryElement('', 3).set_vector([3]) # Simulate a populated contributing adjudication state (there must be # some positives for a simulated post-refine state to be valid). iqrs.rank_contrib_pos = {d1} iqrs.rank_contrib_neg = {d0} # Simulate post-refine results map. iqrs.results = { d0: 0.1, d1: 0.8, d2: 0.2, d3: 0.4, } # Cache should be initially empty assert iqrs._ordered_non_adj is None # Test that the appropriate sorting actually occurs. with mock.patch('smqtk.iqr.iqr_session.sorted', side_effect=sorted) as m_sorted: actual1 = iqrs.get_unadjudicated_relevancy() m_sorted.assert_called_once() expected = [(d3, 0.4), (d2, 0.2)] assert actual1 == expected # Calling the method a second time should not result in a ``sorted`` # operation due to caching. with mock.patch('smqtk.iqr.iqr_session.sorted', side_effect=sorted) as m_sorted: actual2 = iqrs.get_unadjudicated_relevancy() m_sorted.assert_not_called() assert actual2 == expected # Both returns should be shallow copies, thus not the same list # instances. assert id(actual1) != id(actual2)
def test_adjudicate_cache_resetting_negative(self): """ Test results view cache resetting functionality on adjudicating certain ways. """ e = DescriptorMemoryElement('', 0).set_vector([0]) iqrs = IqrSession() iqrs._ordered_pos = True iqrs._ordered_neg = True iqrs._ordered_non_adj = True # Check that adding a positive adjudication resets the positive and # non-adjudicated result caches. iqrs.adjudicate(new_negatives=[e]) assert iqrs._ordered_pos is True # NOT reset assert iqrs._ordered_neg is None # reset assert iqrs._ordered_non_adj is None # reset
def get_current_iqr_session(self): """ Get the current IQR Session instance. :rtype: smqtk.IQR.iqr_session.IqrSession """ with self._iqr_controller: sid = flask.session.sid if not self._iqr_controller.has_session_uuid(sid): iqr_sess = IqrSession(self._pos_seed_neighbors, self._rel_index_config, sid) self._iqr_controller.add_session(iqr_sess, sid) self._iqr_work_dirs[iqr_sess.uuid] = \ osp.join(self.work_dir, sid) safe_create_dir(self._iqr_work_dirs[iqr_sess.uuid]) self._iqr_example_data[iqr_sess.uuid] = {} self._iqr_example_pos_descr[iqr_sess.uuid] = {} return self._iqr_controller.get_session(sid)
def get_current_iqr_session(self): """ Get the current IQR Session instance. :rtype: smqtk.IQR.iqr_session.IqrSession """ with self._iqr_controller: sid = flask.session.sid if not self._iqr_controller.has_session_uuid(sid): sid_work_dir = osp.join(self.work_dir, sid) iqr_sess = IqrSession(sid_work_dir, self._descriptor_generator, self._nn_index, self._pos_seed_neighbors, self._rel_index_config, self._descr_elem_factory, sid) self._iqr_controller.add_session(iqr_sess, sid) return self._iqr_controller.get_session(sid)
def test_adjudicate_new_pos_neg(self): """ Test that providing iterables to ``new_positives`` and ``new_negatives`` parameters result in additions to the positive and negative sets respectively. """ iqrs = IqrSession() p0 = DescriptorMemoryElement('', 0).set_vector([0]) iqrs.adjudicate(new_positives=[p0]) assert iqrs.positive_descriptors == {p0} assert iqrs.negative_descriptors == set() n1 = DescriptorMemoryElement('', 1).set_vector([1]) iqrs.adjudicate(new_negatives=[n1]) assert iqrs.positive_descriptors == {p0} assert iqrs.negative_descriptors == {n1} p2 = DescriptorMemoryElement('', 2).set_vector([2]) p3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) iqrs.adjudicate(new_positives=[p2, p3], new_negatives=[n4]) assert iqrs.positive_descriptors == {p0, p2, p3} assert iqrs.negative_descriptors == {n1, n4}
def createSession(self, params): smqtkFolder = params['smqtkFolder'] sessionsFolder = getCreateSessionsFolder() # Get the folder with images in it, since this is what's used for computing # what descriptor set table to use dataFolderId = ModelImporter.model('folder').load( ObjectId(smqtkFolder), user=getCurrentUser()) dataFolderId = str(dataFolderId['parentId']) # Create session named after its id session = ModelImporter.model('item').createItem( 'placeholder_name', getCurrentUser(), sessionsFolder) session['name'] = str(session['_id']) ModelImporter.model('item').save(session) sessionId = str(session['_id']) ModelImporter.model('item').setMetadata( session, { 'smqtk_folder_id': smqtkFolder, 'data_folder_id': dataFolderId, 'pos_uuids': [], 'neg_uuids': [] }) # already registered in the controller, return if self.controller.has_session_uuid(sessionId): return session iqrs = IqrSession(self.positive_seed_neighbors, session_uid=sessionId) with self.controller: with iqrs: # because classifier maps locked by session self.controller.add_session(iqrs) self.session_classifiers[sessionId] = None self.session_classifier_dirty[sessionId] = True return session
def test_adjudicate_both_labels(self): """ Test that providing a descriptor element as both a positive AND negative adjudication causes no state change.. """ iqrs = IqrSession() # Set initial state p0 = DescriptorMemoryElement('', 0).set_vector([0]) p1 = DescriptorMemoryElement('', 1).set_vector([1]) p2 = DescriptorMemoryElement('', 2).set_vector([2]) n3 = DescriptorMemoryElement('', 3).set_vector([3]) n4 = DescriptorMemoryElement('', 4).set_vector([4]) # Set initial state iqrs.positive_descriptors = {p0, p1, p2} iqrs.negative_descriptors = {n3, n4} # Attempt adjudicating a new element as both postive AND negative e = DescriptorMemoryElement('', 5).set_vector([5]) iqrs.adjudicate(new_positives=[e], new_negatives=[e]) assert iqrs.positive_descriptors == {p0, p1, p2} assert iqrs.negative_descriptors == {n3, n4}
def add_iqr_state_classifier(self): """ Train a classifier based on the user-provided IQR state file bytes in a base64 encoding, matched with a descriptive label of that classifier's topic. Since all IQR session classifiers end up only having two result classes (positive and negative), the topic of the classifier is encoded in the descriptive label the user applies to the classifier. Below is an example call to this endpoint via the ``requests`` python module, showing how base64 data is sent:: import base64 import requests data_bytes = "Load some content bytes here." requests.get('http://localhost:5000/iqr_classifier', data={'bytes_b64': base64.b64encode(data_bytes), 'label': 'some_label'}) With curl on the command line:: $ curl -X POST localhost:5000/iqr_classifier \ -d "label=some_label" \ --data-urlencode "bytes_b64=$(base64 -w0 /path/to/file)" # If this fails, you may wish to encode the file separately and # use the file reference syntax instead: $ base64 -w0 /path/to/file > /path/to/file.b64 $ curl -X POST localhost:5000/iqr_classifier -d label=some_label \ --data-urlencode bytes_64@/path/to/file.b64 To lock this classifier and guard it against deletion, add "lock_label=true":: $ curl -X POST localhost:5000/iqr_classifier \ -d "label=some_label" \ -d "lock_label=true" \ --data-urlencode "bytes_b64=$(base64 -w0 /path/to/file)" Form arguments: iqr_state_b64 base64 encoding of the bytes of the IQR session state save file. label Descriptive label to apply to this classifier. This should not conflict with existing classifier labels. lock_label If 'true', disallow deletion of this label. If 'false', allow deletion of this label. Only has an effect if deletion is enabled for this service. (Default: 'false') Returns 201. """ data_b64 = flask.request.values.get('bytes_b64', default=None) label = flask.request.values.get('label', default=None) lock_clfr_str = flask.request.values.get('lock_label', default='false') if data_b64 is None or len(data_b64) == 0: return make_response_json("No state base64 data provided.", 400) elif label is None or len(label) == 0: return make_response_json("No descriptive label provided.", 400) try: lock_clfr = bool(flask.json.loads(lock_clfr_str)) except JSON_DECODE_EXCEPTION: return make_response_json("Invalid boolean value for" " 'lock_label'. Was given: '%s'" % lock_clfr_str, 400) try: # Using urlsafe version because it handles both regular and urlsafe # alphabets. data_bytes = base64.urlsafe_b64decode(data_b64.encode('utf-8')) except (TypeError, binascii.Error) as ex: return make_response_json("Invalid base64 input: %s" % str(ex)), \ 400 # If the given label conflicts with one already in the collection, # fail. if label in self.classifier_collection.labels(): return make_response_json( "Label already exists in classifier collection.", 400) # Create dummy IqrSession to extract pos/neg descriptors. iqrs = IqrSession() iqrs.set_state_bytes(data_bytes, self.descriptor_factory) pos = iqrs.positive_descriptors | iqrs.external_positive_descriptors neg = iqrs.negative_descriptors | iqrs.external_negative_descriptors del iqrs # Make a classifier instance from the stored config for IQR # session-based classifiers. #: :type: SupervisedClassifier classifier = smqtk.utils.plugin.from_plugin_config( self.iqr_state_classifier_config, get_classifier_impls(sub_interface=SupervisedClassifier) ) classifier.train(class_examples={'positive': pos, 'negative': neg}) try: self.classifier_collection.add_classifier(label, classifier) # If we're allowing deletions, get the lock flag from the form and # set it for this classifier if self.enable_classifier_removal and lock_clfr: self.immutable_labels.add(label) except ValueError as e: if e.args[0].find('JSON') > -1: return make_response_json("Tried to parse malformed JSON in " "form argument.", 400) return make_response_json("Duplicate label ('%s') added during " "classifier training of provided IQR " "session state." % label, 400, label=label) return make_response_json("Finished training IQR-session-based " "classifier for label '%s'." % label, 201, label=label)
def test_refine_no_prev_results(self): """ Test that the results of RelevancyIndex ranking are directly reflected in a new results dictionary of probability values, even for elements that were also used in adjudication. This test is useful because a previous state of the IQR Session structure would force return probabilities for some descriptor elements to certain values if they were also present in the positive or negative adjudicate (internal or external) sets. """ # IqrSession instance. No config for rel_index because we will mock # that. iqrs = IqrSession() # Mock relevancy index in order to check how its called and mock return # value. iqrs.rel_index = mock.MagicMock(spec=RelevancyIndex) # Mock length to be non-zero to simulate it having contents iqrs.rel_index.__len__.return_value = 1 test_in_pos_elem = DescriptorMemoryElement('t', 0).set_vector([0]) test_in_neg_elem = DescriptorMemoryElement('t', 1).set_vector([1]) test_ex_pos_elem = DescriptorMemoryElement('t', 2).set_vector([2]) test_ex_neg_elem = DescriptorMemoryElement('t', 3).set_vector([3]) test_other_elem = DescriptorMemoryElement('t', 4).set_vector([4]) # Mock return dictionary, probabilities don't matter much other than # they are not 1.0 or 0.0. iqrs.rel_index.rank.return_value = \ {e: 0.5 for e in [test_in_pos_elem, test_in_neg_elem, test_other_elem]} # Asserting expected pre-condition where there are no results yet. assert iqrs.results is None # Prepare IQR state for refinement # - set dummy internal/external positive negatives. iqrs.external_descriptors(positive=[test_ex_pos_elem], negative=[test_ex_neg_elem]) iqrs.adjudicate(new_positives=[test_in_pos_elem], new_negatives=[test_in_neg_elem]) # Test calling refine method iqrs.refine() # We test that: # - ``rel_index.rank`` called with the combination of # external/adjudicated descriptor elements. # - ``results`` attribute now has a dict value # - value of ``results`` attribute is what we expect. iqrs.rel_index.rank.assert_called_once_with( {test_in_pos_elem, test_ex_pos_elem}, {test_in_neg_elem, test_ex_neg_elem}, ) assert iqrs.results is not None assert len(iqrs.results) == 3 assert test_other_elem in iqrs.results assert test_in_pos_elem in iqrs.results assert test_in_neg_elem in iqrs.results assert iqrs.results[test_other_elem] == 0.5 assert iqrs.results[test_in_pos_elem] == 0.5 assert iqrs.results[test_in_neg_elem] == 0.5
def test_refine_with_prev_results(self): """ Test that the results of RelevancyIndex ranking are directly reflected in an existing results dictionary of probability values. """ # IqrSession instance. No config for rel_index because we will mock # that. iqrs = IqrSession() # Mock relevancy index in order to check how its called and mock return # value. iqrs.rel_index = mock.MagicMock(spec=RelevancyIndex) # Mock length to be non-zero to simulate it having contents iqrs.rel_index.__len__.return_value = 1 test_in_pos_elem = DescriptorMemoryElement('t', 0).set_vector([0]) test_in_neg_elem = DescriptorMemoryElement('t', 1).set_vector([1]) test_ex_pos_elem = DescriptorMemoryElement('t', 2).set_vector([2]) test_ex_neg_elem = DescriptorMemoryElement('t', 3).set_vector([3]) test_other_elem = DescriptorMemoryElement('t', 4).set_vector([4]) # Mock return dictionary, probabilities don't matter much other than # they are not 1.0 or 0.0. iqrs.rel_index.rank.return_value = \ {e: 0.5 for e in [test_in_pos_elem, test_in_neg_elem, test_other_elem]} # Create a "previous state" of the results dictionary containing # results from our "working set" of descriptor elements. iqrs.results = { test_in_pos_elem: 0.2, test_in_neg_elem: 0.2, test_other_elem: 0.2, # ``refine`` replaces the previous dict, so disjoint keys are # NOT retained. 'something else': 0.3, } # Prepare IQR state for refinement # - set dummy internal/external positive negatives. iqrs.external_descriptors(positive=[test_ex_pos_elem], negative=[test_ex_neg_elem]) iqrs.adjudicate(new_positives=[test_in_pos_elem], new_negatives=[test_in_neg_elem]) # Test calling refine method iqrs.refine() # We test that: # - ``rel_index.rank`` called with the combination of # external/adjudicated descriptor elements. # - ``results`` attribute now has an dict value # - value of ``results`` attribute is what we expect. iqrs.rel_index.rank.assert_called_once_with( {test_in_pos_elem, test_ex_pos_elem}, {test_in_neg_elem, test_ex_neg_elem}, ) assert iqrs.results is not None assert len(iqrs.results) == 3 assert test_other_elem in iqrs.results assert test_in_pos_elem in iqrs.results assert test_in_neg_elem in iqrs.results assert 'something else' not in iqrs.results assert iqrs.results[test_other_elem] == 0.5 assert iqrs.results[test_in_pos_elem] == 0.5 assert iqrs.results[test_in_neg_elem] == 0.5