Esempio n. 1
0
    def test_maximal_search(self):
        """
        Test that the maximal search is finding different results.
        """
        culled_results = self.ips.client_state_map[
            self.doc_id]['search_results']
        self.ips.cull_overlapping = Mock(side_effect=self.return_value)

        self.ips.process_analyze_document([], [])
        pa_results = json.loads(self.ips.send_response.call_args[0][2])
        actions = pa_results['actions']
        self.assertTrue(actions[0]['action'] == 'showProgressbar')

        startTime = time.time()
        while actions[0]['action'] != 'highlightText' and (
                time.time() - startTime < 100):
            self.ips.process_analyze_document([], [])
            pa_results = json.loads(self.ips.send_response.call_args[0][2])
            actions = pa_results['actions']
            self.assertTrue(actions[0]['action'] == 'highlightText'
                            or actions[0]['action'] == 'updateProgress')
            time.sleep(0.25)

        self.assertTrue(actions[0]['action'] == 'highlightText')
        self.assertTrue(actions[1]['action'] == 'showSidebar')

        unculled_results = self.ips.client_state_map[
            self.doc_id]['search_results']

        self.assertFalse(
            compare_search_results(unculled_results, culled_results))
Esempio n. 2
0
    def test_analyze_basic(self):
        """
        Basic check, ensure that spellcheck runs and the results are as expected
        """
        # Basic sanity checks
        self.assertTrue(self.ips.client_state_map[self.doc_id]['document_id']
                        == self.doc_id)
        self.assertTrue(
            self.ips.client_state_map[self.doc_id]['search_result_index'] is 1)
        self.assertTrue(
            len(self.ips.client_state_map[self.doc_id]['search_results']) is
            self.expected_search_size)

        self.assertTrue(
            compare_search_results(
                self.search_gt,
                self.ips.client_state_map[self.doc_id]['search_results']),
            'Search result sets do not match!')
Esempio n. 3
0
    def setUp(self):
        """
        Configure an instance of IntentParserServer for spellcheck testing.
        """

        # Clear all link preferences
        if os.path.exists(IntentParserServer.LINK_PREF_PATH):
            for file in os.listdir(IntentParserServer.LINK_PREF_PATH):
                os.remove(os.path.join(IntentParserServer.LINK_PREF_PATH,
                                       file))
            os.rmdir(IntentParserServer.LINK_PREF_PATH)

        self.doc_content = None
        with open(os.path.join(self.dataDir, self.spellcheckFile), 'r') as fin:
            self.doc_content = json.loads(fin.read())

        if self.doc_content is None:
            self.fail('Failed to read in test document! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckFile))

        self.doc_id = '1xMqOx9zZ7h2BIxSdWp2Vwi672iZ30N_2oPs8rwGUoTA'
        self.user = '******'
        self.user_email = '*****@*****.**'
        self.json_body = {
            'documentId': self.doc_id,
            'user': self.user,
            'userEmail': self.user_email
        }

        self.google_accessor = GoogleAccessor.create()
        self.template_spreadsheet_id = '1r3CIyv75vV7A7ghkB0od-TM_16qSYd-byAbQ1DhRgB0'
        self.spreadsheet_id = self.google_accessor.copy_file(
            file_id=self.template_spreadsheet_id,
            new_title='Intent Parser Server Test Sheet')

        self.sbh_collection_uri = 'https://hub-staging.sd2e.org/user/sd2e/src/intent_parser_collection/1'

        curr_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(curr_path, 'sbh_creds.json'), 'r') as file:
            creds = json.load(file)
            self.sbh_username = creds['username']
            self.sbh_password = creds['password']

        self.ips = IntentParserServer(
            bind_port=8081,
            bind_ip='0.0.0.0',
            sbh_collection_uri=self.sbh_collection_uri,
            spreadsheet_id=self.spreadsheet_id,
            sbh_username=self.sbh_username,
            sbh_password=self.sbh_password)
        self.ips.initialize_server()
        self.ips.start(background=True)

        self.ips.analyze_processing_map_lock = Mock()
        self.ips.client_state_lock = Mock()
        self.ips.client_state_map = {}
        self.ips.google_accessor = Mock()
        self.ips.google_accessor.get_document = Mock(
            return_value=self.doc_content)
        self.ips.send_response = Mock()
        self.ips.get_json_body = Mock(return_value=self.json_body)
        self.ips.analyze_processing_map = {}
        self.ips.analyze_processing_lock = {}

        self.ips.item_map_lock = Mock()
        with open(os.path.join(self.dataDir, self.items_json), 'r') as fin:
            self.ips.item_map = json.load(fin)

        self.ips.process_analyze_document([], [])
        pa_results = json.loads(self.ips.send_response.call_args[0][2])
        actions = pa_results['actions']
        self.assertTrue(actions[0]['action'] == 'showProgressbar')

        startTime = time.time()
        while actions[0]['action'] != 'highlightText' and (
                time.time() - startTime < 100):
            self.ips.process_analyze_document([], [])
            pa_results = json.loads(self.ips.send_response.call_args[0][2])
            actions = pa_results['actions']
            self.assertTrue(actions[0]['action'] == 'highlightText'
                            or actions[0]['action'] == 'updateProgress')
            time.sleep(0.25)

        self.assertTrue(actions[0]['action'] == 'highlightText')
        self.assertTrue(actions[1]['action'] == 'showSidebar')

        # Code to generate GT search results, for when test doc is updated
        #with open(os.path.join(self.dataDir, self.searchResults), 'wb') as fout:
        #    pickle.dump(self.ips.client_state_map[self.doc_id]['search_results'], fout)

        self.search_gt = None
        with open(os.path.join(self.dataDir, self.searchResults), 'rb') as fin:
            self.search_gt = pickle.load(fin)

        if self.search_gt is None:
            self.fail('Failed to read in spelling results! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckResults))

        compare_search_results(
            self.search_gt,
            self.ips.client_state_map[self.doc_id]['search_results'])
Esempio n. 4
0
    def test_maximal_search_ordering(self):
        """
        I found that with a certain order, some overlaps weren't being found.
        The issue is that, lets say indices 1&2 and 2&3 overlap.
        If 2 is the maximal, then it will get removed when it overlaps with 1, but then won't get considered as overlapping with 3, so 3 won't be removed.
        This test case covers that sitaution
        """

        # Hand-coded test case based on real data.  I found that the order of the results mattered for culling.
        unculled_input = [{
            'paragraph_index': 5,
            'offset': 24,
            'end_offset': 25,
            'term': 'M9',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/M9/1',
            'link': None,
            'text': 'M9'
        }, {
            'paragraph_index': 167,
            'offset': 0,
            'end_offset': 1,
            'term': 'M9',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/M9/1',
            'link': None,
            'text': 'M9'
        }, {
            'paragraph_index': 168,
            'offset': 0,
            'end_offset': 1,
            'term': 'M9',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/M9/1',
            'link': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'text': 'M9'
        }, {
            'paragraph_index': 5,
            'offset': 24,
            'end_offset': 37,
            'term': 'M9 Media Salts',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'link': None,
            'text': 'M9 media salts'
        }, {
            'paragraph_index': 168,
            'offset': 0,
            'end_offset': 13,
            'term': 'M9 Media Salts',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'link': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'text': 'M9 media salts'
        }, {
            'paragraph_index': 162,
            'offset': 13,
            'end_offset': 17,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'Media'
        }, {
            'paragraph_index': 166,
            'offset': 14,
            'end_offset': 18,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'Media'
        }, {
            'paragraph_index': 193,
            'offset': 15,
            'end_offset': 19,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'Media'
        }, {
            'paragraph_index': 32,
            'offset': 146,
            'end_offset': 155,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 198,
            'offset': 57,
            'end_offset': 66,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 203,
            'offset': 19,
            'end_offset': 28,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 203,
            'offset': 73,
            'end_offset': 82,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 203,
            'offset': 89,
            'end_offset': 98,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 205,
            'offset': 59,
            'end_offset': 68,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 229,
            'offset': 27,
            'end_offset': 36,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 273,
            'offset': 17,
            'end_offset': 26,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 280,
            'offset': 9,
            'end_offset': 18,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 7,
            'offset': 69,
            'end_offset': 78,
            'term': 'engineered',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/engineered/1',
            'link': None,
            'text': 'engineered'
        }, {
            'paragraph_index': 12,
            'offset': 0,
            'end_offset': 9,
            'term': 'engineered',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/engineered/1',
            'link': None,
            'text': 'engineered'
        }, {
            'paragraph_index': 218,
            'offset': 0,
            'end_offset': 9,
            'term': 'engineered',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/engineered/1',
            'link': None,
            'text': 'engineered'
        }, {
            'paragraph_index': 5,
            'offset': 27,
            'end_offset': 31,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 163,
            'offset': 34,
            'end_offset': 38,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 164,
            'offset': 31,
            'end_offset': 35,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 168,
            'offset': 3,
            'end_offset': 7,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'text': 'media'
        }, {
            'paragraph_index': 185,
            'offset': 71,
            'end_offset': 75,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 186,
            'offset': 37,
            'end_offset': 41,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 212,
            'offset': 172,
            'end_offset': 176,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 5,
            'offset': 39,
            'end_offset': 45,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteom'
        }, {
            'paragraph_index': 25,
            'offset': 31,
            'end_offset': 39,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomic'
        }, {
            'paragraph_index': 8,
            'offset': 138,
            'end_offset': 147,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 25,
            'offset': 41,
            'end_offset': 50,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 27,
            'offset': 196,
            'end_offset': 205,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 30,
            'offset': 98,
            'end_offset': 107,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 32,
            'offset': 9,
            'end_offset': 18,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'text': 'proteomics'
        }, {
            'paragraph_index': 126,
            'offset': 41,
            'end_offset': 50,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 158,
            'offset': 58,
            'end_offset': 67,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 5,
            'offset': 4,
            'end_offset': 12,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'roteomics'
        }]

        culled_gt = [{
            'paragraph_index': 5,
            'offset': 4,
            'end_offset': 12,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'roteomics'
        }, {
            'paragraph_index': 5,
            'offset': 24,
            'end_offset': 37,
            'term': 'M9 Media Salts',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'link': None,
            'text': 'M9 media salts'
        }, {
            'paragraph_index': 5,
            'offset': 39,
            'end_offset': 45,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteom'
        }, {
            'paragraph_index': 7,
            'offset': 69,
            'end_offset': 78,
            'term': 'engineered',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/engineered/1',
            'link': None,
            'text': 'engineered'
        }, {
            'paragraph_index': 8,
            'offset': 138,
            'end_offset': 147,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 12,
            'offset': 0,
            'end_offset': 9,
            'term': 'engineered',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/engineered/1',
            'link': None,
            'text': 'engineered'
        }, {
            'paragraph_index': 25,
            'offset': 31,
            'end_offset': 39,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomic'
        }, {
            'paragraph_index': 25,
            'offset': 41,
            'end_offset': 50,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 27,
            'offset': 196,
            'end_offset': 205,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 30,
            'offset': 98,
            'end_offset': 107,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 32,
            'offset': 9,
            'end_offset': 18,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'text': 'proteomics'
        }, {
            'paragraph_index': 32,
            'offset': 146,
            'end_offset': 155,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 126,
            'offset': 41,
            'end_offset': 50,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 158,
            'offset': 58,
            'end_offset': 67,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'proteomics'
        }, {
            'paragraph_index': 162,
            'offset': 13,
            'end_offset': 17,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'Media'
        }, {
            'paragraph_index': 163,
            'offset': 34,
            'end_offset': 38,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 164,
            'offset': 31,
            'end_offset': 35,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 166,
            'offset': 14,
            'end_offset': 18,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'Media'
        }, {
            'paragraph_index': 167,
            'offset': 0,
            'end_offset': 1,
            'term': 'M9',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/M9/1',
            'link': None,
            'text': 'M9'
        }, {
            'paragraph_index': 168,
            'offset': 0,
            'end_offset': 13,
            'term': 'M9 Media Salts',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'link': 'https://hub.sd2e.org/user/sd2e/design/teknova_M1902/1',
            'text': 'M9 media salts'
        }, {
            'paragraph_index': 185,
            'offset': 71,
            'end_offset': 75,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 186,
            'offset': 37,
            'end_offset': 41,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 193,
            'offset': 15,
            'end_offset': 19,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'Media'
        }, {
            'paragraph_index': 198,
            'offset': 57,
            'end_offset': 66,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 203,
            'offset': 19,
            'end_offset': 28,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 203,
            'offset': 73,
            'end_offset': 82,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 203,
            'offset': 89,
            'end_offset': 98,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 205,
            'offset': 59,
            'end_offset': 68,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 212,
            'offset': 172,
            'end_offset': 176,
            'term': 'Media',
            'uri': 'https://hub.sd2e.org/user/sd2e/design/Media/1',
            'link': None,
            'text': 'media'
        }, {
            'paragraph_index': 218,
            'offset': 0,
            'end_offset': 9,
            'term': 'engineered',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/engineered/1',
            'link': None,
            'text': 'engineered'
        }, {
            'paragraph_index': 229,
            'offset': 27,
            'end_offset': 36,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 273,
            'offset': 17,
            'end_offset': 26,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }, {
            'paragraph_index': 280,
            'offset': 9,
            'end_offset': 18,
            'term': 'proteomics',
            'uri': 'https://hub.sd2e.org/user/sd2e/src/proteomics/1',
            'link': None,
            'text': 'Proteomics'
        }]

        culled_result = self.ips.cull_overlapping(unculled_input)
        culled_result = sorted(culled_result,
                               key=itemgetter('paragraph_index', 'offset'))
        self.assertTrue(compare_search_results(culled_result, culled_gt))