Exemple #1
0
 def load_metadata_from_google_doc(self):
     try:
         google_accessor = GoogleAccessor().get_google_drive_accessor()
         self._metadata = google_accessor.get_document_metadata(document_id=self._document_id)
         return self._metadata
     except Exception:
         raise RequestErrorException(HTTPStatus.NOT_FOUND, errors=['Failed to access document ' + self._document_id])
Exemple #2
0
    def setUp(self, mock_intent_parser_sbh):
        curr_path = os.path.dirname(os.path.realpath(__file__))
        self.data_dir = os.path.join(curr_path, 'data')
        self.mock_data_dir = os.path.join(self.data_dir, 'mock_data')
        with open(os.path.join(self.data_dir, 'authn.json'), 'r') as file:
            self.authn = json.load(file)['authn']

        self.drive_accessor = GoogleAccessor().get_google_drive_accessor()
        self.maxDiff = None

        self.mock_intent_parser_sbh = mock_intent_parser_sbh
        self.sbol_dictionary = SBOLDictionaryAccessor(
            intent_parser_constants.SD2_SPREADSHEET_ID,
            self.mock_intent_parser_sbh)
        self.sbol_dictionary.initial_fetch()
        datacatalog_config = {
            "mongodb": {
                "database": "catalog_staging",
                "authn": self.authn
            }
        }
        self.intentparser_factory = IntentParserFactory(
            datacatalog_config, self.mock_intent_parser_sbh,
            self.sbol_dictionary)
        self.uploaded_file_id = ''
Exemple #3
0
    def __init__(self, spreadsheet_id, sbh):
        self.google_accessor = GoogleAccessor.create()
        self.google_accessor.set_spreadsheet_id(spreadsheet_id)
        self.sbh = sbh

        curr_path = os.path.dirname(os.path.realpath(__file__))
        self.item_map_file = os.path.join(curr_path, 'item-map.json')
Exemple #4
0
 def load_metadata_from_google_doc(self):
     try:
         google_accessor = GoogleAccessor.create()
         self._metadata = google_accessor.get_document_metadata(
             document_id=self._document_id)
         return self._metadata
     except Exception:
         raise ConnectionException(
             HTTPStatus.NOT_FOUND,
             'Failed to access document ' + self._document_id)
    def __init__(self, spreadsheet_id, sbh):
        self.google_accessor = GoogleAccessor(
        ).get_google_spreadsheet_accessor()
        self.sbh = sbh

        self.analyze_terms = {}
        self.analyze_lock = threading.Lock()
        self.spreadsheet_lock = threading.Lock()
        self.spreadsheet_tab_data = {}
        self.spreadsheet_thread = threading.Thread(
            target=self._periodically_fetch_spreadsheet)

        self._spreadsheet_id = spreadsheet_id
        self._tab_headers = dict()
        self._inverse_tab_headers = dict()
        self.MAPPING_FAILURES = 'Mapping Failures'

        self.type_tabs = {
            'Attribute': ['Attribute'],
            'Reagent': [
                'Bead', 'CHEBI', 'Protein', 'Media', 'Stain', 'Buffer',
                'Solution'
            ],
            'Genetic Construct': ['DNA', 'RNA'],
            'Strain': ['Strain'],
            'Protein': ['Protein'],
            'Collections': ['Challenge Problem']
        }
        self._dictionary_headers = [
            'Common Name', 'Type', 'SynBioHub URI', 'Stub Object?',
            'Definition URI', 'Definition URI / CHEBI ID', 'Status'
        ]

        self.mapping_failures_headers = [
            'Experiment/Run', 'Lab', 'Item Name', 'Item ID',
            'Item Type (Strain or Reagent Tab)', 'Status'
        ]

        self.labs = [
            'BioFAB', 'Ginkgo', 'Transcriptic', 'LBNL', 'EmeraldCloud',
            'CalTech', 'PennState (Salis)'
        ]
Exemple #6
0
 def setUp(self):
     self.spreadsheet_accessor = GoogleAccessor(
     ).get_google_spreadsheet_accessor()
     self.doc_accessor = GoogleAccessor().get_google_doc_accessor()
     self.drive_accessor = GoogleAccessor().get_google_drive_accessor()
     self.app_script_acccessor = GoogleAccessor(
     ).get_google_app_script_accessor()
Exemple #7
0
class GoogleAccessorTest(unittest.TestCase):
    def setUp(self):
        self.spreadsheet_accessor = GoogleAccessor(
        ).get_google_spreadsheet_accessor()
        self.doc_accessor = GoogleAccessor().get_google_doc_accessor()
        self.drive_accessor = GoogleAccessor().get_google_drive_accessor()
        self.app_script_acccessor = GoogleAccessor(
        ).get_google_app_script_accessor()

    def tearDown(self):
        pass

    def test_spreadsheet_deletion(self):
        spreadsheet_id = self.spreadsheet_accessor.create_new_spreadsheet(
            name='Spreadsheet To Delete')
        self.assertTrue(spreadsheet_id)
        self.assertTrue(self.drive_accessor.delete_file(spreadsheet_id))

    def test_create_spreadsheet_from_given_folder(self):
        folder_id = '1693MJT1Up54_aDUp1s3mPH_DRw1_GS5G'
        spreadsheet_id = self.spreadsheet_accessor.create_new_spreadsheet(
            'Spreadsheet To Delete')
        self.drive_accessor.move_file_to_folder(folder_id, spreadsheet_id)
        self.assertTrue(spreadsheet_id)
        self.assertTrue(self.drive_accessor.delete_file(spreadsheet_id))
    def setUpClass(self):
        curr_path = os.path.dirname(os.path.realpath(__file__))
        self.data_dir = os.path.join(curr_path, 'data')
        self.mock_data_dir = os.path.join(self.data_dir, 'mock_data')
        
        cp_request_dir = os.path.join(curr_path, 'data', 'cp-request')
#         git_accessor = git.cmd.Git(cp_request_dir)
#         git_accessor.pull()
        self.structured_request_dir = os.path.join(cp_request_dir, 'input', 'structured_requests')
        
        with open(os.path.join(self.data_dir, 'authn.json'), 'r') as file:
            self.authn = json.load(file)['authn']
             
        self.google_accessor = GoogleAccessor.create()
        self.maxDiff = None  
Exemple #9
0
 def load_from_google_doc(self):
     try:
         doc_accessor = GoogleAccessor().get_google_doc_accessor()
         drive_accessor = GoogleAccessor().get_google_drive_accessor()
         document = doc_accessor.get_document(document_id=self._document_id)
         self._head_revision = drive_accessor.get_head_revision(self._document_id)
         self._links_info = self._get_links_from_doc(document)
         self._paragraphs = self._get_paragraph_from_doc(document)
         self._parents = drive_accessor.get_document_parents(document_id=self._document_id)
         self._tables = self._get_tables_from_doc(document)
         self._title = intent_parser_utils.get_element_type(document, 'title')
         return document
     except Exception as ex:
         self.logger.warning(''.join(traceback.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__)))
         raise RequestErrorException(HTTPStatus.NOT_FOUND, errors=['Failed to access document ' + self._document_id])
Exemple #10
0
 def load_from_google_doc(self):
     try:
         google_accessor = GoogleAccessor.create()
         document = google_accessor.get_document(
             document_id=self._document_id)
         self._head_revision = google_accessor.get_head_revision(
             self._document_id)
         self._links_info = self._get_links_from_doc(document)
         self._paragraphs = self._get_paragraph_from_doc(document)
         self._parents = google_accessor.get_document_parents(
             document_id=self._document_id)
         self._tables = intent_parser_utils.get_element_type(
             document, 'table')
         self._title = intent_parser_utils.get_element_type(
             document, 'title')
         return document
     except Exception:
         raise ConnectionException(
             HTTPStatus.NOT_FOUND,
             'Failed to access document ' + self._document_id)
Exemple #11
0
    def setUp(self):
        """
        Configure an instance of IntentParserServer for spellcheck testing.
        """
        self.doc_content = None
        with open(os.path.join(self.dataDir, self.spellcheckFile), 'r') as fin:
            self.doc_content = json.loads(fin.read())

        if self.doc_content is None:
            self.fail('Failed to read in test document! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckFile))

        # Clear all dictionary information
        if os.path.exists(IntentParserServer.DICT_PATH):
            for file in os.listdir(IntentParserServer.DICT_PATH):
                os.remove(os.path.join(IntentParserServer.DICT_PATH, file))
            os.rmdir(IntentParserServer.DICT_PATH)

        self.doc_id = '1xMqOx9zZ7h2BIxSdWp2Vwi672iZ30N_2oPs8rwGUoTA'
        self.user = '******'
        self.user_email = '*****@*****.**'
        self.json_body = {
            'documentId': self.doc_id,
            'user': self.user,
            'userEmail': self.user_email
        }

        self.google_accessor = GoogleAccessor.create()
        self.template_spreadsheet_id = '1r3CIyv75vV7A7ghkB0od-TM_16qSYd-byAbQ1DhRgB0'
        self.spreadsheet_id = self.google_accessor.copy_file(
            file_id=self.template_spreadsheet_id,
            new_title='Intent Parser Server Test Sheet')

        self.sbh_collection_uri = 'https://hub-staging.sd2e.org/user/sd2e/src/intent_parser_collection/1'

        curr_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(curr_path, 'sbh_creds.json'), 'r') as file:
            creds = json.load(file)
            self.sbh_username = creds['username']
            self.sbh_password = creds['password']

        self.ips = IntentParserServer(
            bind_port=8081,
            bind_ip='0.0.0.0',
            sbh_collection_uri=self.sbh_collection_uri,
            spreadsheet_id=self.spreadsheet_id,
            sbh_username=self.sbh_username,
            sbh_password=self.sbh_password)
        self.ips.initialize_server()
        self.ips.start(background=True)

        self.ips.client_state_lock = Mock()
        self.ips.client_state_map = {}
        self.ips.google_accessor = Mock()
        self.ips.google_accessor.get_document = Mock(
            return_value=self.doc_content)
        self.ips.send_response = Mock()
        self.ips.get_json_body = Mock(return_value=self.json_body)

        self.ips.process_add_by_spelling([], [])

        # Code to save the GT spelling results, when the test doc has been updated
        #with open(os.path.join(self.dataDir, self.spellcheckResults), 'wb') as fout:
        #    pickle.dump(self.ips.client_state_map[self.doc_id]['spelling_results'], fout)

        self.spelling_gt = None
        with open(os.path.join(self.dataDir, self.spellcheckResults),
                  'rb') as fin:
            self.spelling_gt = pickle.load(fin)

        if self.spelling_gt is None:
            self.fail('Failed to read in spelling results! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckResults))
Exemple #12
0
    def setUpClass(self):
        # The Google API appears to create resource warnings when run
        # from unit test similar to the following:
        #
        # site-packages/googleapiclient/_helpers.py:130:
        #  ResourceWarning: unclosed <ssl.SSLSocket fd=6,
        #                            family=AddressFamily.AF_INET6,
        #                            type=SocketKind.SOCK_STREAM,
        #                            proto=6,
        #                            laddr=('192.168.0.1', 49988, 0, 0),
        #                            raddr=('192.168.0.2', 443, 0, 0)>
        #
        # There is some discussion of similar warnings here:
        #
        #  https://github.com/kennethreitz/requests/issues/3912
        #
        # I am just going ignore these warnings
        #
        warnings.filterwarnings('ignore',
                                message='unclosed <ssl.SSLSocket',
                                category=ResourceWarning)

        # If we don't have the necessary credentials, try reading them in from json
        if not hasattr(IntegrationIpsTest, 'sbh_username') or not hasattr(
                IntegrationIpsTest, 'sbh_password'):
            with open(
                    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                 'sbh_creds.json'), 'r') as fin:
                creds = json.load(fin)
                IntegrationIpsTest.sbh_username = creds['username']
                IntegrationIpsTest.sbh_password = creds['password']

        self.google_accessor = GoogleAccessor.create()

        self.bind_ip = 'localhost'
        self.bind_port = 8081
        self.template_doc_id = '10HqgtfVCtYhk3kxIvQcwljIUonSNlSiLBC8UFmlwm1s'
        self.template_spreadsheet_id = '1r3CIyv75vV7A7ghkB0od-TM_16qSYd-byAbQ1DhRgB0'

        self.template_doc_last_rev = '2019-01-30T17:45:49.339Z'
        self.template_sheet_last_rev = '2019-06-12T20:29:13.519Z'

        rev_results = self.google_accessor.get_document_revisions(
            document_id=self.template_doc_id)
        if not 'drive#revisionList' == rev_results['kind'] or len(
                rev_results['items']) < 1:
            print('ERROR: Failed to retrieve revisions for document template!')
            raise Exception
        last_rev = rev_results['items'][0]['modifiedDate']
        if not last_rev == self.template_doc_last_rev:
            print(
                'ERROR: template document has been modified! Expected last revision: %s, received %s!'
                % (self.template_doc_last_rev, last_rev))
            raise Exception

        rev_results = self.google_accessor.get_document_revisions(
            document_id=self.template_spreadsheet_id)
        if not 'drive#revisionList' == rev_results['kind'] or len(
                rev_results['items']) < 1:
            print(
                'ERROR: Failed to retrieve revisions for spreadsheet template!'
            )
            raise Exception
        last_rev = rev_results['items'][0]['modifiedDate']
        if not last_rev == self.template_sheet_last_rev:
            print(
                'ERROR: template spreadsheet has been modified! Expected last revision: %s, received %s!'
                % (self.template_sheet_last_rev, last_rev))
            raise Exception

        self.server_url = 'http://' + self.bind_ip + ':' + str(self.bind_port)

        self.doc_id = self.google_accessor.copy_file(
            file_id=self.template_doc_id,
            new_title='Intent Parser Server Test Doc')

        self.spreadsheet_id = self.google_accessor.copy_file(
            file_id=self.template_spreadsheet_id,
            new_title='Intent Parser Server Test Sheet')

        self.doc = self.google_accessor.get_document(document_id=self.doc_id)

        sbh_collection_uri = 'https://hub-staging.sd2e.org/user/sd2e/' + \
            'src/intent_parser_collection/1'

        self.intent_parser = IntentParserServer(
            sbh_collection_uri=sbh_collection_uri,
            sbh_username=IntegrationIpsTest.sbh_username,
            sbh_password=IntegrationIpsTest.sbh_password,
            spreadsheet_id=self.spreadsheet_id,
            item_map_cache=False,
            bind_ip='localhost',
            bind_port=8081)
        self.intent_parser.initialize_server()
        self.intent_parser.start(background=True)

        self.maxDiff = None
def perform_automatic_run(current_release,
                          drive_id='1FYOFBaUDIS-lBn0fr76pFFLBbMeD25b3'):
    drive_access = GoogleAccessor().get_google_drive_accessor(version=3)
    app_script_access = GoogleAccessor().get_google_app_script_accessor()

    local_docs = util.load_json_file(ADDON_FILE)
    remote_docs = drive_access.get_all_docs(drive_id)
    while len(remote_docs) > 0:
        doc = remote_docs.pop(0)
        r_id = doc
        logger.info('Processing doc: ' + r_id)
        if r_id in local_docs:
            try:
                metadata = local_docs[r_id]
                if metadata['releaseVersion'] != current_release:
                    logger.info(
                        'Updating script project metadata for doc: %s' % r_id)
                    script_id = metadata['scriptId']

                    remote_metadata = app_script_access.get_project_metadata(
                        script_id)
                    app_script_access.update_project_metadata(
                        script_id, remote_metadata,
                        INTENT_PARSER_ADDON_CODE_FILE,
                        INTENT_PARSER_MANIFEST_FILE)

                    new_version = app_script_access.get_head_version(
                        script_id) + 1
                    publish_message = current_release + ' Release'
                    app_script_access.create_version(script_id, new_version,
                                                     publish_message)

                    local_docs[r_id] = {
                        'scriptId': script_id,
                        'releaseVersion': current_release
                    }
                    util.write_json_to_file(local_docs, ADDON_FILE)
            except errors.HttpError:
                logger.info('Reached update quota limit!')
                remote_docs.append(doc)
                time.sleep(60)
        else:
            try:
                logger.info('Creating add-on for doc: %s' % r_id)
                script_proj_title = 'IPProject Release'
                response = app_script_access.create_project(
                    script_proj_title, r_id)
                script_id = response['scriptId']

                remote_metadata = app_script_access.get_project_metadata(
                    script_id)
                app_script_access.set_project_metadata(
                    script_id, remote_metadata, USER_ACCOUNT,
                    INTENT_PARSER_ADDON_CODE_FILE, INTENT_PARSER_MANIFEST_FILE,
                    'Code')

                local_docs[r_id] = {
                    'scriptId': script_id,
                    'releaseVersion': current_release
                }
                util.write_json_to_file(local_docs, ADDON_FILE)
            except errors.HttpError:
                logger.info('Reached create quota limit!')
                remote_docs.append(doc)
                time.sleep(60)
class SBOLDictionaryAccessor(object):
    """
    Provide functionalities to read and write information to the SBOL Dictionary Maintainer Google Spreadsheet.
    """

    logger = logging.getLogger('intent_parser_sbol_dictionary')

    # Some lab UIDs are short but still valid.  This defines an exceptions to the length threshold.
    UID_LENGTH_EXCEPTION = ['M9', 'LB']

    # Determine how long a lab UID string has to be in order to be added to the item map.
    # Strings below this size are ignored.
    UID_LENGTH_THRESHOLD = 3

    curr_path = os.path.dirname(os.path.realpath(__file__))
    ITEM_MAP_FILE = os.path.join(curr_path, 'item-map.json')

    ANALYZE_TABS = [
        dictionary_constants.TAB_ATTRIBUTE,
        dictionary_constants.TAB_GENETIC_CONSTRUCTS,
        dictionary_constants.TAB_PROTEIN, dictionary_constants.TAB_REAGENT,
        dictionary_constants.TAB_STRAIN
    ]

    SYNC_PERIOD = timedelta(minutes=30)

    def __init__(self, spreadsheet_id, sbh):
        self.google_accessor = GoogleAccessor(
        ).get_google_spreadsheet_accessor()
        self.sbh = sbh

        self.analyze_terms = {}
        self.analyze_lock = threading.Lock()
        self.spreadsheet_lock = threading.Lock()
        self.spreadsheet_tab_data = {}
        self.spreadsheet_thread = threading.Thread(
            target=self._periodically_fetch_spreadsheet)

        self._spreadsheet_id = spreadsheet_id
        self._tab_headers = dict()
        self._inverse_tab_headers = dict()
        self.MAPPING_FAILURES = 'Mapping Failures'

        self.type_tabs = {
            'Attribute': ['Attribute'],
            'Reagent': [
                'Bead', 'CHEBI', 'Protein', 'Media', 'Stain', 'Buffer',
                'Solution'
            ],
            'Genetic Construct': ['DNA', 'RNA'],
            'Strain': ['Strain'],
            'Protein': ['Protein'],
            'Collections': ['Challenge Problem']
        }
        self._dictionary_headers = [
            'Common Name', 'Type', 'SynBioHub URI', 'Stub Object?',
            'Definition URI', 'Definition URI / CHEBI ID', 'Status'
        ]

        self.mapping_failures_headers = [
            'Experiment/Run', 'Lab', 'Item Name', 'Item ID',
            'Item Type (Strain or Reagent Tab)', 'Status'
        ]

        self.labs = [
            'BioFAB', 'Ginkgo', 'Transcriptic', 'LBNL', 'EmeraldCloud',
            'CalTech', 'PennState (Salis)'
        ]

    def initial_fetch(self):
        self._fetch_spreadsheet_data()

    def get_spreadsheet_data(self):
        self.spreadsheet_lock.acquire()
        sheet_data = self.spreadsheet_tab_data.copy()
        self.spreadsheet_lock.release()
        return sheet_data

    def get_analyzed_terms(self):
        """
        Retrieve terms from the dictionary with its corresponding SBH URI.
        Returns:
            A dictionary where key represents a dictionary term and value represents a SBH uri.
        """
        self.analyze_lock.acquire()
        dictionary_terms = self.analyze_terms.copy()
        self.analyze_lock.release()
        return dictionary_terms

    def get_tab_name_from_item_type(self, targeted_item_type):
        result = None
        for tab_name, item_types in self.type_tabs.items():
            if targeted_item_type in item_types:
                result = tab_name

        if result is None:
            raise DictionaryMaintainerException(
                'Unable to locate tab name in SBOL Dictionary for item type: %s'
                % targeted_item_type)
        return result

    def start_synchronizing_spreadsheet(self):
        self._fetch_spreadsheet_data()
        self.spreadsheet_thread.start()

    def stop_synchronizing_spreadsheet(self):
        self.spreadsheet_thread.join()

    def _periodically_fetch_spreadsheet(self):
        while True:
            time.sleep(self.SYNC_PERIOD.total_seconds())
            self._fetch_spreadsheet_data()

    def _fetch_spreadsheet_data(self):
        self.logger.info('Fetching SBOL Dictionary spreadsheet')

        self.spreadsheet_lock.acquire()
        self._fetch_tabs()
        self.spreadsheet_lock.release()

        self.analyze_lock.acquire()
        self._fetch_analyze_terms()
        self.analyze_lock.release()

    def _fetch_tabs(self):
        spreadsheet_tabs = self.type_tabs.keys()
        update_spreadsheet_data = {}
        try:
            for tab in spreadsheet_tabs:
                update_spreadsheet_data[tab] = self.get_row_data(tab=tab)
                self.logger.info('Fetched data from tab ' + tab)
            self.spreadsheet_tab_data = update_spreadsheet_data
        except errors.HttpError:
            self.logger.info('Reached spreadsheet fetch quota limit!')

    def _fetch_analyze_terms(self):
        dictionary_terms = {}
        try:
            for tab in self.ANALYZE_TABS:
                dictionary_terms.update(
                    self._get_dictionary_terms_from_tab(tab))
            self.analyze_terms = dictionary_terms
        except errors.HttpError:
            self.logger.info('Reached spreadsheet fetch quota limit!')

    def _get_dictionary_terms_from_tab(self, tab):
        dictionary_terms = {}
        tab_data = self.get_row_data(tab=tab)
        for common_name, strain in self._create_strain_intents_from_spreadsheet_tab(
                tab_data).items():
            for name in strain.get_lab_strain_names():
                if len(name) > 2:
                    dictionary_terms[name] = strain.get_strain_reference_link()
            if len(common_name) > 2:
                dictionary_terms[strain.get_strain_common_name(
                )] = strain.get_strain_reference_link()
        return dictionary_terms

    def create_dictionary_entry(self, data, document_url, item_definition_uri):
        item_type = data['itemType']
        item_name = data['commonName']
        item_lab_ids = data['labId']
        item_lab_id_tag = data['labIdSelect']

        item_uri = document_url
        type2tab = self.load_type2tab()
        tab_name = type2tab[item_type]

        try:
            tab_data = self.get_row_data(tab=tab_name)
        except:
            raise Exception('Failed to access dictionary spreadsheet')

        # Get common names
        item_map = {}
        for row_data in tab_data:
            common_name = row_data['Common Name']
            if common_name is None or len(common_name) == 0:
                continue
            item_map[common_name] = row_data

        if item_name in item_map:
            raise Exception('"' + item_name +
                            '" already exists in dictionary spreadsheet')

        dictionary_entry = {
            'tab': tab_name,
            'row': len(tab_data) + 3,
            'Common Name': item_name,
            'Type': item_type
        }
        if tab_name == 'Reagent':
            dictionary_entry['Definition URI / CHEBI ID'] = item_definition_uri
        else:
            dictionary_entry['Definition URI'] = item_definition_uri

        if item_type != 'Attribute':
            dictionary_entry['Stub Object?'] = 'YES'

        dictionary_entry[item_lab_id_tag] = item_lab_ids
        dictionary_entry['SynBioHub URI'] = item_uri

        try:
            self.set_row_data(dictionary_entry)
        except:
            raise Exception(
                'Failed to add entry to the dictionary spreadsheet')

    def get_tab_sheet(self, tab_name):
        """Retrieve contents from a spreadsheet tab.
        Args:
            tab_name: name of tab.
        Returns:
            A spreadsheet tab.
        Raises:
            DictionaryMaintainerException to indicate if a tab does not exist within a spreadsheet.
        """
        self.spreadsheet_lock.acquire()
        sheet_data = self.spreadsheet_tab_data.copy()
        self.spreadsheet_lock.release()
        target_tab = None
        for tab in sheet_data:
            if tab == tab_name:
                target_tab = sheet_data[tab]
                break
        if target_tab is None:
            raise DictionaryMaintainerException(
                'Unable to locate %s tab in spreadsheet.' % tab_name)
        return target_tab

    def load_type2tab(self):
        # Inverse map of typeTabs
        type2tab = {}
        for tab_name in self.type_tabs.keys():
            for type_name in self.type_tabs[tab_name]:
                type2tab[type_name] = tab_name
        return type2tab

    def add_sheet_request(self, sheet_title):
        """ Creates a Google request to add a tab to the current spreadsheet

        Args:
            sheet_title: name of the new tab
        """

        request = {'addSheet': {'properties': {'title': sheet_title}}}
        return request

    def create_dictionary_sheets(self):
        """ Creates the standard tabs on the current spreadsheet.
            The tabs are not populated with any data
        """
        add_sheet_requests = list(
            map(lambda x: self.add_sheet_request(x),
                list(self.type_tabs.keys())))
        # Mapping Failures tab
        add_sheet_requests.append(self.add_sheet_request(
            self.MAPPING_FAILURES))
        self.google_accessor.execute_requests(add_sheet_requests)

        # Add sheet column headers
        headers = self._dictionary_headers
        headers += list(map(lambda x: x + ' UID', self.labs))

        for tab in self.type_tabs.keys():
            self._set_tab_data(tab + '!2:2', [headers])

        self._set_tab_data(self.MAPPING_FAILURES + '!2:2',
                           [self.mapping_failures_headers])

    def _cache_tab_headers(self, tab):
        """
        Cache the headers (and locations) in a tab
        returns a map that maps headers to column indexes
        """
        tab_data = self.google_accessor.get_tab_data(tab + "!2:2",
                                                     self._spreadsheet_id)

        if 'values' not in tab_data:
            raise Exception('No header values found in tab "' + tab + '"')

        header_values = tab_data['values'][0]
        header_map = {}
        for index in range(len(header_values)):
            header_map[header_values[index]] = index

        inverse_header_map = {}
        for key in header_map.keys():
            inverse_header_map[header_map[key]] = key

        self._tab_headers[tab] = header_map
        self._inverse_tab_headers[tab] = inverse_header_map

    def _clear_tab_header_cache(self):
        self._tab_headers.clear()
        self._inverse_tab_headers.clear()

    def get_tab_headers(self, tab):
        """
        Get the headers (and locations) in a tab
        returns a map that maps headers to column indexes
        """
        if tab not in self._tab_headers.keys():
            self._cache_tab_headers(tab)

        return self._tab_headers[tab]

    def _get_tab_inverse_headers(self, tab):
        """
        Get the headers (and locations) in a tab
        returns a map that maps column indexes to headers
        """
        if tab not in self._inverse_tab_headers.keys():
            self._cache_tab_headers(tab)

        return self._inverse_tab_headers[tab]

    def get_row_data(self, tab, row=None):
        """
        Retrieve data in a tab.  Returns a list of maps, where each list
        element maps a header name to the corresponding row value.  If
        no row is specified all rows are returned
        """
        if tab not in self._tab_headers.keys():
            self._cache_tab_headers(tab)

        header_value = self._inverse_tab_headers[tab]

        if row is None:
            value_range = tab + '!3:9999'
        else:
            value_range = tab + '!' + str(row) + ":" + str(row)

        tab_data = self.google_accessor.get_tab_data(value_range,
                                                     self._spreadsheet_id)

        row_data = []
        if 'values' not in tab_data:
            return row_data

        values = tab_data['values']
        row_index = 3
        for row_values in values:
            this_row_data = {}
            for i in range(len(header_value)):
                if i >= len(row_values):
                    break
                header = header_value[i]
                value = row_values[i]

                if value is not None:
                    this_row_data[header] = value

            if len(this_row_data) > 0:
                this_row_data['row'] = row_index
                this_row_data['tab'] = tab
                row_data.append(this_row_data)
            row_index += 1
        return row_data

    def set_row_data(self, entry):
        """
        Write a row to the spreadsheet.  The entry is a map that maps
        column headers to the corresponding values, with an additional
        set of keys that specify the tab and the spreadsheet row
        """
        tab = entry['tab']
        row = entry['row']
        row_data = self.gen_row_data(entry=entry, tab=tab)
        row_range = '{}!{}:{}'.format(tab, row, row)
        self.google_accessor.set_tab_data(row_range, [row_data],
                                          self._spreadsheet_id)

    def set_row_value(self, entry, column):
        """
        Write a single cell value, given an entry, and the column name
        of the entry to be written
        """
        return self.set_cell_value(tab=entry['tab'],
                                   row=entry['row'],
                                   column=column,
                                   value=entry[column])

    def set_cell_value(self, tab, row, column, value):
        """
        Write a single cell value, given an tab, row, column name, and value.
        """
        headers = self.get_tab_headers(tab)
        if column not in headers:
            raise Exception('No column "{}" on tab "{}"'.format(column, tab))

        col = chr(ord('A') + headers[column])
        row_range = tab + '!' + col + str(row)
        self.google_accessor.set_tab_data(row_range, [[value]],
                                          self._spreadsheet_id)

    def gen_row_data(self, entry, tab):
        """
        Generate a list of spreadsheet row value given a map the maps
        column headers to values
        """
        headers = self._get_tab_inverse_headers(tab)
        row_data = [''] * (max(headers.keys()) + 1)

        for index in headers.keys():
            header = headers[index]
            if header not in entry:
                continue
            row_data[index] = entry[header]

        return row_data

    def map_common_names_and_tacc_id(self):
        result = {}
        attribute_tab = self.get_tab_sheet(dictionary_constants.TAB_ATTRIBUTE)
        for row in attribute_tab:
            if dictionary_constants.COLUMN_COMMON_NAME in row and dictionary_constants.COLUMN_TACC_UID in row:
                common_name = row[dictionary_constants.COLUMN_COMMON_NAME]
                tacc_id = row[dictionary_constants.COLUMN_TACC_UID]
                if tacc_id:
                    result[tacc_id] = common_name
        return result

    def get_mapped_strain(self, lab_name):
        """Create a mapping for strains from the Strains tab.
        Args:
            lab_name: A string to represent the name of a Lab.

        Returns:
            A Dict of StrainIntent objects. The key represents the sbh uri.
            The value is a StrainIntent object
        """
        mapped_strains = {}
        if lab_name not in dictionary_constants.MAPPED_LAB_UID:
            message = 'Unable to map %s to a LAB_UID in the SBOL Dictionary for processing strains.' % lab_name
            raise DictionaryMaintainerException(message)
        lab_uid = dictionary_constants.MAPPED_LAB_UID[lab_name]

        strain_tab = self.get_tab_sheet(dictionary_constants.TAB_STRAIN)
        for row in strain_tab:
            if (dictionary_constants.COLUMN_COMMON_NAME in row
                    and dictionary_constants.COLUMN_SYNBIOHUB_URI in row
                    and lab_uid in row):
                sbh_uri = row[dictionary_constants.COLUMN_SYNBIOHUB_URI]
                common_name = row[dictionary_constants.COLUMN_COMMON_NAME]
                lab_strain_names = []
                if row[lab_uid]:
                    lab_strain_names = [
                        name for name in cell_parser.PARSER.extract_name_value(
                            row[lab_uid])
                    ]
                mapped_strains[sbh_uri] = SBOLDictionaryStrainIntent(
                    sbh_uri,
                    lab_name,
                    common_name,
                    lab_strain_names=lab_strain_names)
        return mapped_strains

    def _create_strain_intents_from_spreadsheet_tab(self, tab):
        strain_intents = {}
        for row in tab:
            if dictionary_constants.COLUMN_COMMON_NAME in row and dictionary_constants.COLUMN_SYNBIOHUB_URI in row:
                sbh_uri = row[dictionary_constants.COLUMN_SYNBIOHUB_URI]
                common_name = row[dictionary_constants.COLUMN_COMMON_NAME]
                for lab_name, lab_uid in dictionary_constants.MAPPED_LAB_UID.items(
                ):
                    if lab_uid and lab_uid in row:
                        if row[lab_uid]:
                            lab_strain_names = [
                                name for name in cell_parser.PARSER.
                                extract_name_value(row[lab_uid])
                            ]
                            strain_intents[
                                common_name] = SBOLDictionaryStrainIntent(
                                    sbh_uri,
                                    lab_name,
                                    common_name,
                                    lab_strain_names=lab_strain_names)
                        else:
                            strain_intents[
                                common_name] = SBOLDictionaryStrainIntent(
                                    sbh_uri, lab_name, common_name)
        return strain_intents

    def get_common_name_from_transcriptic_id(self, transcriptic_id):
        mappings = self.map_common_names_and_transcriptic_id()
        for key, value in mappings.items():
            if transcriptic_id == value:
                return key
        return None

    def map_common_names_and_transcriptic_id(self):
        result = {}
        attribute_tab = self.get_tab_sheet(dictionary_constants.TAB_ATTRIBUTE)
        for row in attribute_tab:
            if dictionary_constants.COLUMN_COMMON_NAME in row and dictionary_constants.COLUMN_TRANSCRIPT_UID in row:
                common_name = row[dictionary_constants.COLUMN_COMMON_NAME]
                strateos_id = row[dictionary_constants.COLUMN_TRANSCRIPT_UID]
                if strateos_id:
                    result[common_name] = strateos_id
        return result
 def setUpClass(self):
     self.google_accessor = GoogleAccessor.create()
    def setUp(self):
        """
        Configure an instance of IntentParserServer for generation testing.
        """
        self.doc_content = None
        with open(os.path.join(self.dataDir, self.spellcheckFile), 'r') as fin:
            self.doc_content = json.loads(fin.read())

        if self.doc_content is None:
            self.fail('Failed to read in test document! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckFile))

        # Clear all dictionary information
        if os.path.exists(IntentParserServer.DICT_PATH):
            for file in os.listdir(IntentParserServer.DICT_PATH):
                os.remove(os.path.join(IntentParserServer.DICT_PATH, file))
            os.rmdir(IntentParserServer.DICT_PATH)

        with open(os.path.join(self.dataDir, self.authn_file), 'r') as fin:
            self.authn = json.loads(fin.read())['authn']

        self.doc_id = '1xMqOx9zZ7h2BIxSdWp2Vwi672iZ30N_2oPs8rwGUoTA'
        self.user = '******'
        self.user_email = '*****@*****.**'
        self.json_body = {
            'documentId': self.doc_id,
            'user': self.user,
            'userEmail': self.user_email
        }

        self.google_accessor = GoogleAccessor.create()
        self.template_spreadsheet_id = '1r3CIyv75vV7A7ghkB0od-TM_16qSYd-byAbQ1DhRgB0'
        self.spreadsheet_id = self.google_accessor.copy_file(
            file_id=self.template_spreadsheet_id,
            new_title='Intent Parser Server Test Sheet')

        self.sbh_collection_uri = 'https://hub-staging.sd2e.org/user/sd2e/src/intent_parser_collection/1'

        curr_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(curr_path, 'sbh_creds.json'), 'r') as file:
            creds = json.load(file)
            self.sbh_username = creds['username']
            self.sbh_password = creds['password']

        self.ips = IntentParserServer(
            bind_port=8081,
            bind_ip='0.0.0.0',
            sbh_collection_uri=self.sbh_collection_uri,
            spreadsheet_id=self.spreadsheet_id,
            sbh_username=self.sbh_username,
            sbh_password=self.sbh_password)
        self.ips.initialize_server()
        self.ips.start(background=True)

        self.ips.client_state_lock = Mock()
        self.ips.client_state_map = {}
        self.ips.google_accessor = Mock()
        self.ips.google_accessor.get_document = Mock(
            return_value=self.doc_content)
        self.ips.send_response = Mock()
        self.ips.get_json_body = Mock(return_value=self.json_body)
        self.ips.analyze_processing_map = {}
        self.ips.analyze_processing_map_lock = Mock()
        self.ips.analyze_processing_lock = Mock()

        # Load example measurement table JSON data.  Contains 9 tables, 2 of which are measurement tables.
        with open(os.path.join(self.dataDir, self.tablesFile), 'r') as fin:
            self.table_data = json.loads(fin.read())

        self.ips.item_map_lock = Mock()
        with open(os.path.join(self.dataDir, self.items_json), 'r') as fin:
            self.ips.item_map = json.load(fin)

        self.httpMessage = Mock()
        self.httpMessage.get_resource = Mock(
            return_value=
            '/document_report?1xMqOx9zZ7h2BIxSdWp2Vwi672iZ30N_2oPs8rwGUoTA')

        self.ips.google_accessor.get_document_parents = Mock(
            return_value=self.parent_list)
        self.ips.google_accessor.get_document_metadata = Mock(
            return_value=self.parent_meta)
        self.maxDiff = None
Exemple #17
0
class TableCreator(object):
    def __init__(self):
        self.doc_accessor = GoogleAccessor().get_google_doc_accessor()
        self.ip_table_factory = IntentParserTableFactory()

    def create_lab_table_from_intent(self, lab_intent: LabIntent):
        lab_table = [[
            '%s: %s' %
            (ip_constants.HEADER_LAB_VALUE, lab_intent.get_lab_name())
        ]]
        return lab_table

    def create_parameter_table_from_intent(self, parameter_intent):
        parameter_table = [[
            ip_constants.HEADER_PARAMETER_VALUE,
            ip_constants.HEADER_PARAMETER_VALUE_VALUE
        ]]

        protocol_name = parameter_intent.get_protocol_name(
        ) if parameter_intent.get_protocol_name() else ' '
        parameter_table.append(
            [ip_constants.PARAMETER_PROTOCOL_NAME, protocol_name])
        self._add_run_parameters(parameter_intent, parameter_table)
        for name, value in parameter_intent.get_default_parameters().items():
            parameter_table.append([name, str(value)])
        return parameter_table

    def _add_run_parameters(self, parameter_intent, parameter_table):
        parameter_value = parameter_intent.get_xplan_base_dir(
        ) if parameter_intent.get_xplan_base_dir() else ''
        parameter_table.append([
            ip_constants.PROTOCOL_FIELD_XPLAN_BASE_DIRECTORY, parameter_value
        ])

        parameter_value = parameter_intent.get_xplan_reactor(
        ) if parameter_intent.get_xplan_reactor() else 'xplan'
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_XPLAN_REACTOR, parameter_value])

        parameter_value = parameter_intent.get_plate_size(
        ) if parameter_intent.get_plate_size() else ''
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_PLATE_SIZE, parameter_value])

        parameter_value = parameter_intent.get_plate_number(
        ) if parameter_intent.get_plate_number() else ''
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_PLATE_NUMBER, parameter_value])

        parameter_value = parameter_intent.get_container_search_string(
        ) if parameter_intent.get_container_search_string() else ' '
        parameter_table.append([
            ip_constants.PROTOCOL_FIELD_CONTAINER_SEARCH_STRING,
            parameter_value
        ])

        parameter_value = parameter_intent.get_strain_property(
        ) if parameter_intent.get_strain_property() else ''
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_STRAIN_PROPERTY, parameter_value])

        parameter_value = parameter_intent.get_xplan_path(
        ) if parameter_intent.get_xplan_path() else ''
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_XPLAN_PATH, parameter_value])

        parameter_value = parameter_intent.get_submit_flag(
        ) if parameter_intent.get_submit_flag() else 'True'
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_SUBMIT, parameter_value])

        parameter_value = parameter_intent.get_protocol_id(
        ) if parameter_intent.get_protocol_id() else ''
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_PROTOCOL_ID, parameter_value])

        parameter_value = parameter_intent.get_test_mode(
        ) if parameter_intent.get_test_mode() else 'False'
        parameter_table.append(
            [ip_constants.PROTOCOL_FIELD_TEST_MODE, parameter_value])

        parameter_value = parameter_intent.get_experiment_ref_url(
        ) if parameter_intent.get_experiment_ref_url() else ''
        parameter_table.append([
            ip_constants.PROTOCOL_FIELD_EXPERIMENT_REFERENCE_URL_FOR_XPLAN,
            parameter_value
        ])

    def create_measurement_table_from_intents(self, measurement_intents):
        measurement_headers = self._create_measurement_table_header(
            measurement_intents)
        measurement_data = self._add_measurement_data(measurement_intents,
                                                      measurement_headers)
        measurement_table = [list(measurement_headers.keys())
                             ] + measurement_data
        return measurement_table

    def _add_measurement_data(self, measurement_intents, measurement_headers):
        measurement_data = []
        for measurement_intent in measurement_intents:
            row_data = [' '] * len(measurement_headers)
            if ip_constants.HEADER_MEASUREMENT_TYPE_VALUE in measurement_headers:
                measurement_type = measurement_intent.get_measurement_type(
                ) if measurement_intent.has_measurement_type() else ' '
                index = measurement_headers[
                    ip_constants.HEADER_MEASUREMENT_TYPE_VALUE]
                row_data[index] = measurement_type
            if ip_constants.HEADER_STRAINS_VALUE in measurement_headers:
                if measurement_intent.size_of_strains() > 0:
                    strain_values = ', '.join([
                        strain.get_name().get_name()
                        for strain in measurement_intent.get_strains()
                    ])
                    index = measurement_headers[
                        ip_constants.HEADER_STRAINS_VALUE]
                    row_data[index] = strain_values
            if ip_constants.HEADER_TEMPERATURE_VALUE in measurement_headers:
                if measurement_intent.size_of_temperatures() > 0:
                    temperature_values = ', '.join([
                        '%s %s' % (str(temperature.get_value()),
                                   str(temperature.get_unit())) for temperature
                        in measurement_intent.get_temperatures()
                    ])
                    index = measurement_headers[
                        ip_constants.HEADER_TEMPERATURE_VALUE]
                    row_data[index] = temperature_values
            if ip_constants.HEADER_TIMEPOINT_VALUE in measurement_headers:
                if measurement_intent.size_of_timepoints() > 0:
                    timepoint_values = ', '.join([
                        '%s %s' %
                        (str(timepoint.get_value()), str(timepoint.get_unit()))
                        for timepoint in measurement_intent.get_timepoints()
                    ])
                    index = measurement_headers[
                        ip_constants.HEADER_TIMEPOINT_VALUE]
                    row_data[index] = timepoint_values
            if not measurement_intent.contents_is_empty():
                for content_intent in measurement_intent.get_contents(
                ).get_contents():
                    if content_intent.size_of_reagents() > 0:
                        for reagent in content_intent.get_reagents():
                            reagent_name = reagent.get_reagent_name().get_name(
                            )
                            if reagent_name in measurement_headers and len(
                                    reagent.get_reagent_values()) > 0:
                                reagent_values = []
                                for reagent_value in reagent.get_reagent_values(
                                ):
                                    if reagent_value.get_unit():
                                        reagent_values.append(
                                            '%s %s' %
                                            (str(reagent_value.get_value()),
                                             str(reagent_value.get_unit())))
                                    else:
                                        reagent_values.append(
                                            '%s' %
                                            (str(reagent_value.get_value())))
                                index = measurement_headers[reagent_name]
                            row_data[index] = ', '.join(reagent_values)
                    elif content_intent.size_of_medias() > 0:
                        for media in content_intent.get_medias():
                            media_name = media.get_media_name().get_name()
                            if media_name in measurement_headers and len(
                                    media.get_media_values()) > 0:
                                media_values = ', '.join([
                                    '%s' % media_value.get_name() for
                                    media_value in media.get_media_values()
                                ])
                                index = measurement_headers[media_name]
                                row_data[index] = media_values
            measurement_data.append(row_data)
        return measurement_data

    def _create_measurement_table_header(self, measurement_intents):
        header_indices = {}
        for measurement_intent in measurement_intents:
            if measurement_intent.has_measurement_type(
            ) and ip_constants.HEADER_MEASUREMENT_TYPE_VALUE not in header_indices:
                header_indices[
                    ip_constants.HEADER_MEASUREMENT_TYPE_VALUE] = len(
                        header_indices)
            if measurement_intent.size_of_strains(
            ) > 0 and ip_constants.HEADER_STRAINS_VALUE not in header_indices:
                header_indices[ip_constants.HEADER_STRAINS_VALUE] = len(
                    header_indices)
            if measurement_intent.size_of_temperatures(
            ) > 0 and ip_constants.HEADER_TEMPERATURE_VALUE not in header_indices:
                header_indices[ip_constants.HEADER_TEMPERATURE_VALUE] = len(
                    header_indices)
            if measurement_intent.size_of_timepoints(
            ) > 0 and ip_constants.HEADER_TIMEPOINT_VALUE not in header_indices:
                header_indices[ip_constants.HEADER_TIMEPOINT_VALUE] = len(
                    header_indices)
            if not measurement_intent.contents_is_empty():
                for content_intent in measurement_intent.get_contents(
                ).get_contents():
                    if content_intent.size_of_reagents() > 0:
                        for reagent in content_intent.get_reagents():
                            reagent_name = reagent.get_reagent_name().get_name(
                            )
                            if reagent_name not in header_indices:
                                header_indices[reagent_name] = len(
                                    header_indices)
                    elif content_intent.size_of_medias() > 0:
                        for media in content_intent.get_medias():
                            media_name = media.get_media_name().get_name()
                            if media_name not in header_indices:
                                header_indices[media_name] = len(
                                    header_indices)
        return header_indices

    def create_experiment_specification_table(self,
                                              document_id,
                                              experiment_specification_table,
                                              location=None):
        if location is None:
            location = self.doc_accessor.create_end_of_segment_location()
        num_of_rows = experiment_specification_table.num_of_rows()
        num_of_cols = experiment_specification_table.num_of_columns()
        self.doc_accessor.create_table(document_id,
                                       num_of_rows,
                                       num_of_cols,
                                       additional_properties=location)
        lab_experiment = LabExperiment(document_id)
        lab_experiment.load_from_google_doc()
        table_template = lab_experiment.tables()[-1]
        ip_table_template = self.ip_table_factory.from_google_doc(
            table_template)

        cells_to_update = []
        experiment_ids = [
            x for x in experiment_specification_table.
            experiment_id_to_status_table().keys()
        ]
        table_references = [
            x for x in experiment_specification_table.
            experiment_id_to_status_table().values()
        ]

        for row_index in reversed(range(2, num_of_rows)):
            cells_to_update.append(
                self.update_cell_text(ip_table_template,
                                      'Table %d' % table_references.pop(),
                                      row_index, 1))
            cells_to_update.append(
                self.update_cell_text(ip_table_template, experiment_ids.pop(),
                                      row_index, 0))

        cells_to_update.extend(
            self._write_experiment_specification_header_row(ip_table_template))
        cells_to_update.extend(
            self._write_experiment_specification_table_caption(
                ip_table_template,
                experiment_specification_table.get_table_caption()))
        response = self.doc_accessor.execute_batch_request(
            cells_to_update, document_id)

    def _write_experiment_specification_header_row(self, ip_table_template):
        return [
            self.update_cell_text(ip_table_template, 'Experiment Status', 1,
                                  1),
            self.update_cell_text(ip_table_template, 'Experiment ID', 1, 0)
        ]

    def _write_experiment_specification_table_caption(self, ip_table_template,
                                                      table_index):
        caption_row = self.update_cell_text(
            ip_table_template,
            'Table %d: Experiment Specification' % table_index, 0, 0)
        merge_caption_cell = self.doc_accessor.merge_table_cells(
            1, 2, ip_table_template.get_table_start_index(), 0, 0)
        return [merge_caption_cell, caption_row]

    def update_experiment_specification_table(self, document_id,
                                              experiment_specification_table,
                                              new_spec_table):
        intent_parser_table = experiment_specification_table.get_intent_parser_table(
        )
        delete_content = self.doc_accessor.delete_content(
            intent_parser_table.get_table_start_index(),
            intent_parser_table.get_table_end_index())
        self.doc_accessor.execute_batch_request([delete_content], document_id)
        new_spec_table.set_table_caption(
            experiment_specification_table.get_table_caption())
        return self.create_experiment_specification_table(
            document_id, new_spec_table)

    def update_experiment_status_table(self, document_id,
                                       experiment_status_table,
                                       db_statuses_table):
        intent_parser_table = experiment_status_table.get_intent_parser_table()
        delete_content = self.doc_accessor.delete_content(
            intent_parser_table.get_table_start_index(),
            intent_parser_table.get_table_end_index())
        self.doc_accessor.execute_batch_request([delete_content], document_id)
        db_statuses_table.set_table_caption(
            experiment_status_table.get_table_caption())
        return self.create_experiment_status_table(document_id,
                                                   db_statuses_table)

    def create_experiment_status_table(self,
                                       document_id,
                                       experiment_status_table,
                                       location=None):
        if location is None:
            location = self.doc_accessor.create_end_of_segment_location()
        num_of_rows = experiment_status_table.num_of_rows()
        num_of_cols = experiment_status_table.num_of_columns()

        self.doc_accessor.create_table(document_id,
                                       num_of_rows,
                                       num_of_cols,
                                       additional_properties=location)

        lab_experiment = LabExperiment(document_id)
        lab_experiment.load_from_google_doc()
        table_template = lab_experiment.tables()[-1]
        ip_table_template = self.ip_table_factory.from_google_doc(
            table_template)

        cells_to_update = []
        list_of_status = experiment_status_table.get_statuses()

        status_index = 7
        for row_index in reversed(range(2, num_of_rows)):
            status = list_of_status[status_index]

            # Processed column
            cells_to_update.append(
                self.update_cell_text(ip_table_template, status.get_state(),
                                      row_index, 3))
            # Pipeline output column
            cells_to_update.append(
                self.update_cell_text(ip_table_template, status.get_path(),
                                      row_index, 2))
            # Last update column
            cells_to_update.append(
                self.update_cell_text(ip_table_template,
                                      status.get_last_updated(), row_index, 1))
            # Type column
            cells_to_update.append(
                self.update_cell_text(ip_table_template,
                                      status.get_status_type(), row_index, 0))
            status_index = status_index - 1

        cells_to_update.extend(
            self._write_experiment_status_header_row(ip_table_template))
        cells_to_update.extend(
            self._write_experiment_status_table_caption(
                ip_table_template,
                experiment_status_table.get_table_caption()))
        response = self.doc_accessor.execute_batch_request(
            cells_to_update, document_id)

    def _write_experiment_status_header_row(self, ip_table_template):
        return [
            self.update_cell_text(ip_table_template, 'Processed', 1, 3),
            self.update_cell_text(ip_table_template, 'Output From Pipeline', 1,
                                  2),
            self.update_cell_text(ip_table_template, 'Last Update', 1, 1),
            self.update_cell_text(ip_table_template, 'Pipeline Status', 1, 0)
        ]

    def _write_experiment_status_table_caption(self, ip_table_template,
                                               table_index):
        caption_row = self.update_cell_text(
            ip_table_template, 'Table %d: Experiment status' % table_index, 0,
            0)
        merge_caption_cell = self.doc_accessor.merge_table_cells(
            1, 4, ip_table_template.get_table_start_index(), 0, 0)
        return [merge_caption_cell, caption_row]

    def update_cell_text(self, ip_table_template, text, row_index, col_index):
        template_cell = ip_table_template.get_cell(row_index, col_index)
        start_pos = template_cell.get_start_index()
        end_pos = template_cell.get_end_index()
        new_text = self.doc_accessor.insert_text(text, start_pos, end_pos)
        return new_text

    def delete_tables(self, ip_tables, document_id):
        order_to_delete = []
        sort_table_start_indices = [
            ip_table.get_table_start_index() for ip_table in ip_tables
        ]
        sort_table_start_indices.sort()
        sort_table_start_indices.reverse()
        for table_start_index in sort_table_start_indices:
            for ip_table_index in range(len(ip_tables)):
                ip_table = ip_tables[ip_table_index]
                if ip_table.get_table_start_index() == table_start_index:
                    delete_cell_content = self.doc_accessor.delete_content(
                        ip_table.get_table_start_index(),
                        ip_table.get_table_end_index())
                    order_to_delete.append(delete_cell_content)
                    ip_tables.pop(ip_table_index)
                    break

        self.doc_accessor.execute_batch_request(order_to_delete, document_id)
Exemple #18
0
 def __init__(self):
     self.doc_accessor = GoogleAccessor().get_google_doc_accessor()
     self.ip_table_factory = IntentParserTableFactory()
Exemple #19
0
    def setUp(self):
        """
        Configure an instance of IntentParserServer for spellcheck testing.
        """

        # Clear all link preferences
        if os.path.exists(IntentParserServer.LINK_PREF_PATH):
            for file in os.listdir(IntentParserServer.LINK_PREF_PATH):
                os.remove(os.path.join(IntentParserServer.LINK_PREF_PATH,
                                       file))
            os.rmdir(IntentParserServer.LINK_PREF_PATH)

        self.doc_content = None
        with open(os.path.join(self.dataDir, self.spellcheckFile), 'r') as fin:
            self.doc_content = json.loads(fin.read())

        if self.doc_content is None:
            self.fail('Failed to read in test document! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckFile))

        self.doc_id = '1xMqOx9zZ7h2BIxSdWp2Vwi672iZ30N_2oPs8rwGUoTA'
        self.user = '******'
        self.user_email = '*****@*****.**'
        self.json_body = {
            'documentId': self.doc_id,
            'user': self.user,
            'userEmail': self.user_email
        }

        self.google_accessor = GoogleAccessor.create()
        self.template_spreadsheet_id = '1r3CIyv75vV7A7ghkB0od-TM_16qSYd-byAbQ1DhRgB0'
        self.spreadsheet_id = self.google_accessor.copy_file(
            file_id=self.template_spreadsheet_id,
            new_title='Intent Parser Server Test Sheet')

        self.sbh_collection_uri = 'https://hub-staging.sd2e.org/user/sd2e/src/intent_parser_collection/1'

        curr_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(curr_path, 'sbh_creds.json'), 'r') as file:
            creds = json.load(file)
            self.sbh_username = creds['username']
            self.sbh_password = creds['password']

        self.ips = IntentParserServer(
            bind_port=8081,
            bind_ip='0.0.0.0',
            sbh_collection_uri=self.sbh_collection_uri,
            spreadsheet_id=self.spreadsheet_id,
            sbh_username=self.sbh_username,
            sbh_password=self.sbh_password)
        self.ips.initialize_server()
        self.ips.start(background=True)

        self.ips.analyze_processing_map_lock = Mock()
        self.ips.client_state_lock = Mock()
        self.ips.client_state_map = {}
        self.ips.google_accessor = Mock()
        self.ips.google_accessor.get_document = Mock(
            return_value=self.doc_content)
        self.ips.send_response = Mock()
        self.ips.get_json_body = Mock(return_value=self.json_body)
        self.ips.analyze_processing_map = {}
        self.ips.analyze_processing_lock = {}

        self.ips.item_map_lock = Mock()
        with open(os.path.join(self.dataDir, self.items_json), 'r') as fin:
            self.ips.item_map = json.load(fin)

        self.ips.process_analyze_document([], [])
        pa_results = json.loads(self.ips.send_response.call_args[0][2])
        actions = pa_results['actions']
        self.assertTrue(actions[0]['action'] == 'showProgressbar')

        startTime = time.time()
        while actions[0]['action'] != 'highlightText' and (
                time.time() - startTime < 100):
            self.ips.process_analyze_document([], [])
            pa_results = json.loads(self.ips.send_response.call_args[0][2])
            actions = pa_results['actions']
            self.assertTrue(actions[0]['action'] == 'highlightText'
                            or actions[0]['action'] == 'updateProgress')
            time.sleep(0.25)

        self.assertTrue(actions[0]['action'] == 'highlightText')
        self.assertTrue(actions[1]['action'] == 'showSidebar')

        # Code to generate GT search results, for when test doc is updated
        #with open(os.path.join(self.dataDir, self.searchResults), 'wb') as fout:
        #    pickle.dump(self.ips.client_state_map[self.doc_id]['search_results'], fout)

        self.search_gt = None
        with open(os.path.join(self.dataDir, self.searchResults), 'rb') as fin:
            self.search_gt = pickle.load(fin)

        if self.search_gt is None:
            self.fail('Failed to read in spelling results! Path: ' +
                      os.path.join(self.dataDir, self.spellcheckResults))

        compare_search_results(
            self.search_gt,
            self.ips.client_state_map[self.doc_id]['search_results'])
Exemple #20
0
class GoldenFileTest(unittest.TestCase):
    """
    Test a selection of Google docs by generating a structured request for each document and comparing the result to its expected result. 
    Each document are retrieved from  GoogleAccessor by using these document id and its revision id.
    The selected documents come from SD2 cp-request repository. 
    The document id and the revision id are recorded in cp-request/input/structured_request directory.
    Once the document has been retrieved, it is passed into intent parser to generate a structured request. 
    The structured request is then compared with the structured_request result for equivalency.
    """
    @classmethod
    def setUpClass(cls):
        pass

    @patch('intent_parser.accessor.intent_parser_sbh.IntentParserSBH')
    def setUp(self, mock_intent_parser_sbh):
        curr_path = os.path.dirname(os.path.realpath(__file__))
        self.data_dir = os.path.join(curr_path, 'data')
        self.mock_data_dir = os.path.join(self.data_dir, 'mock_data')
        with open(os.path.join(self.data_dir, 'authn.json'), 'r') as file:
            self.authn = json.load(file)['authn']

        self.drive_accessor = GoogleAccessor().get_google_drive_accessor()
        self.maxDiff = None

        self.mock_intent_parser_sbh = mock_intent_parser_sbh
        self.sbol_dictionary = SBOLDictionaryAccessor(
            intent_parser_constants.SD2_SPREADSHEET_ID,
            self.mock_intent_parser_sbh)
        self.sbol_dictionary.initial_fetch()
        datacatalog_config = {
            "mongodb": {
                "database": "catalog_staging",
                "authn": self.authn
            }
        }
        self.intentparser_factory = IntentParserFactory(
            datacatalog_config, self.mock_intent_parser_sbh,
            self.sbol_dictionary)
        self.uploaded_file_id = ''

    def test_YeastSTATES_1_0_Growth_Curves_Request(self):
        file = 'YeastSTATES 1.0 Growth Curves Request.json'
        file_path = os.path.join(self.mock_data_dir, file)
        self._compare_structured_requests(file_path)

    def test_YeastSTATES_1_0_Time_Series_Round_1(self):
        file = 'YeastSTATES 1.0 Time Series Round 1.json'
        file_path = os.path.join(self.mock_data_dir, file)
        self._compare_structured_requests(file_path)

    def test_CP_NovelChassis_Endogenous_Promoter_Blue_1_21(self):
        file = 'CP_NovelChassis_Endogenous_Promoter_Blue_1_21.json'
        file_path = os.path.join(self.mock_data_dir, file)
        self._compare_structured_requests(file_path)

    def _compare_structured_requests(self, document):
        golden_structured_request = intent_parser_utils.load_json_file(
            document)
        golden_doc_url = golden_structured_request['experiment_reference_url']
        doc_id = intent_parser_utils.get_google_doc_id(golden_doc_url)

        if 'doc_revision_id' not in golden_structured_request:
            self.fail('No document revision specified')

        doc_revision_id = golden_structured_request['doc_revision_id']

        upload_mimetype = google_constants.GOOGLE_DOC_MIMETYPE
        download_mimetype = google_constants.WORD_DOC_MIMETYPE

        response = self.drive_accessor.get_file_with_revision(
            doc_id, doc_revision_id, download_mimetype)

        drive_folder_test_dir = '1693MJT1Up54_aDUp1s3mPH_DRw1_GS5G'
        self.uploaded_file_id = self.drive_accessor.upload_revision(
            golden_structured_request['name'],
            response.content,
            drive_folder_test_dir,
            download_mimetype,
            title=golden_structured_request['name'],
            target_format=upload_mimetype)
        print('%s upload doc %s' %
              (datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
               self.uploaded_file_id))

        intent_parser = self.intentparser_factory.create_intent_parser(
            self.uploaded_file_id)
        intent_parser.process_structure_request()
        generated_structured_request = intent_parser.get_structured_request()

        # Skip data that are modified from external resources:
        # experiment_reference, challenge_problem, doc_revision_id, and experiment_id.
        self.assertEqual(
            'https://docs.google.com/document/d/%s' % self.uploaded_file_id,
            generated_structured_request['experiment_reference_url'])
        self.assertEqual(golden_structured_request['lab'],
                         generated_structured_request['lab'])
        self.assertEqual(golden_structured_request['name'],
                         generated_structured_request['name'])
        self._compare_runs(golden_structured_request['runs'],
                           generated_structured_request['runs'])
        if 'parameters' in golden_structured_request:
            self.assertEqual(golden_structured_request['parameters'],
                             generated_structured_request['parameters'])

    def _compare_runs(self, golden, generated):
        # remove fields from golden files that intent parser does not currently support
        for run_index in range(len(golden)):
            run = golden[run_index]
            list_of_measurements = run['measurements']
            for measurement_index in range(len(list_of_measurements)):
                measurement = list_of_measurements[measurement_index]

        self.assertEqual(golden, generated)

    def tearDown(self):
        if self.uploaded_file_id:
            self.drive_accessor.delete_file(self.uploaded_file_id)
            print('%s delete doc %s' %
                  (datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
                   self.uploaded_file_id))

    @classmethod
    def tearDownClass(cls):
        pass