Esempio n. 1
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime',
                                str(e),
                                info={
                                    'User': self.current_user.id,
                                    'query': query
                                })
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user,
                                 study_proc=study_proc,
                                 proc_samples=proc_samples)
        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len(info),
            "iTotalDisplayRecords": len(info),
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
Esempio n. 2
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime', str(e),
                                info={'User': self.current_user.id,
                                      'query': query})
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user, study_proc=study_proc,
                                 proc_samples=proc_samples)
        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len(info),
            "iTotalDisplayRecords": len(info),
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
Esempio n. 3
0
    def test_filter_by_processed_data(self):
        search = QiitaStudySearch()
        results, meta_cols = search('study_id = 1', User('*****@*****.**'))
        spid, pds, meta = search.filter_by_processed_data()
        exp_spid = {1: {'18S': [1]}}
        exp_pds = {1: [
            '1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', '1.SKB4.640189',
            '1.SKB5.640181', '1.SKB6.640176', '1.SKB7.640196', '1.SKB8.640193',
            '1.SKB9.640200', '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198',
            '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', '1.SKD7.640191',
            '1.SKD8.640184', '1.SKD9.640182', '1.SKM1.640183', '1.SKM2.640199',
            '1.SKM3.640197', '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187',
            '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192']}
        exp_meta = pd.DataFrame.from_dict({x: 1 for x in exp_pds[1]},
                                          orient='index')
        exp_meta.rename(columns={0: 'study_id'}, inplace=True)

        self.assertEqual(spid, exp_spid)
        self.assertEqual(pds, exp_pds)
        self.assertEqual(meta.keys(), [1])
        assert_frame_equal(meta[1], exp_meta)
Esempio n. 4
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        search_type = self.get_argument('search_type')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if search_type not in ['user', 'public']:
            raise HTTPError(400, 'Not a valid search type')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime', str(e),
                                info={'User': self.current_user.id,
                                      'query': query})
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user, search_type, study_proc,
                                 proc_samples)
        # linkifying data
        len_info = len(info)
        for i in range(len_info):
            info[i]['shared'] = ", ".join([study_person_linkifier(element)
                                           for element in info[i]['shared']])

            ppid = [pubmed_linkifier([p]) for p in info[i]['publication_pid']]
            pdoi = [doi_linkifier([p]) for p in info[i]['publication_doi']]
            del info[i]['publication_pid']
            del info[i]['publication_doi']
            info[i]['pubs'] = ', '.join(ppid + pdoi)

            info[i]['pi'] = study_person_linkifier(info[i]['pi'])

            info[i]['ebi_info'] = info[i]['ebi_submission_status']
            ebi_study_accession = info[i]['ebi_study_accession']
            if ebi_study_accession:
                info[i]['ebi_info'] = '%s (%s)' % (
                    ''.join([EBI_LINKIFIER.format(a)
                             for a in ebi_study_accession.split(',')]),
                    info[i]['ebi_submission_status'])

        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len_info,
            "iTotalDisplayRecords": len_info,
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
Esempio n. 5
0
 def setUp(self):
     self.search = QiitaStudySearch()
Esempio n. 6
0
class SearchTest(TestCase):
    """Tests that the search object works as expected"""

    def setUp(self):
        self.search = QiitaStudySearch()

    def test_parse_study_search_string(self):
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("altitude > 0")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('altitude') and column_type "
                      "in ('integer', 'float8') INTERSECT "
                      "SELECT study_id from qiita.study_portal "
                      "JOIN qiita.portal_type USING (portal_type_id) "
                      "WHERE portal = 'QIITA'")
        exp_samp_sql = ("SELECT ss.sample_id,sa.altitude "
                        "FROM qiita.study_sample ss "
                        "JOIN qiita.sample_{0} sa "
                        "ON ss.sample_id = sa.sample_id "
                        "JOIN qiita.study st ON st.study_id = ss.study_id "
                        "WHERE sa.altitude > 0")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["altitude"])

        # test NOT
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("NOT altitude > 0")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('altitude') and column_type "
                      "in ('integer', 'float8') INTERSECT "
                      "SELECT study_id from qiita.study_portal "
                      "JOIN qiita.portal_type USING (portal_type_id) "
                      "WHERE portal = 'QIITA'")
        exp_samp_sql = ("SELECT ss.sample_id,sa.altitude "
                        "FROM qiita.study_sample ss "
                        "JOIN qiita.sample_{0} sa "
                        "ON ss.sample_id = sa.sample_id "
                        "JOIN qiita.study st ON st.study_id = ss.study_id "
                        "WHERE NOT sa.altitude > 0")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["altitude"])

        # test AND
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 and ph < 9")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8') INTERSECT "
                      "SELECT study_id from qiita.study_portal "
                      "JOIN qiita.portal_type USING (portal_type_id) "
                      "WHERE portal = 'QIITA'")
        exp_samp_sql = ("SELECT ss.sample_id,sa.ph "
                        "FROM qiita.study_sample ss "
                        "JOIN qiita.sample_{0} sa "
                        "ON ss.sample_id = sa.sample_id "
                        "JOIN qiita.study st ON st.study_id = ss.study_id "
                        "WHERE (sa.ph > 7 AND sa.ph < 9)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["ph"])

        # test OR
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 or ph < 9")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8') INTERSECT "
                      "SELECT study_id from qiita.study_portal "
                      "JOIN qiita.portal_type USING (portal_type_id) "
                      "WHERE portal = 'QIITA'")
        exp_samp_sql = ("SELECT ss.sample_id,sa.ph "
                        "FROM qiita.study_sample ss "
                        "JOIN qiita.sample_{0} sa "
                        "ON ss.sample_id = sa.sample_id "
                        "JOIN qiita.study st ON st.study_id = ss.study_id "
                        "WHERE (sa.ph > 7 OR sa.ph < 9)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["ph"])

        # test includes
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string(
                'host_subject_id includes "Chicken little"')
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns "
                      "WHERE lower(column_name) = lower('host_subject_id') "
                      "and column_type in ('varchar') INTERSECT "
                      "SELECT study_id from qiita.study_portal "
                      "JOIN qiita.portal_type USING (portal_type_id) "
                      "WHERE portal = 'QIITA'")
        exp_samp_sql = ("SELECT ss.sample_id,sa.host_subject_id "
                        "FROM qiita.study_sample ss "
                        "JOIN qiita.sample_{0} sa "
                        "ON ss.sample_id = sa.sample_id "
                        "JOIN qiita.study st ON st.study_id = ss.study_id "
                        "WHERE LOWER(sa.host_subject_id) "
                        "LIKE '%chicken little%'")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["host_subject_id"])

        # test complex query
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string(
                'name = "Billy Bob" or name = "Timmy" or name=Jimbo and '
                'name > 25 or name < 5')
        exp_st_sql = (
            "SELECT study_id FROM qiita.study_sample_columns WHERE "
            "lower(column_name) = lower('name') and column_type in "
            "('varchar') INTERSECT "
            "SELECT study_id from qiita.study_portal "
            "JOIN qiita.portal_type USING (portal_type_id) "
            "WHERE portal = 'QIITA'")
        exp_samp_sql = (
            "SELECT ss.sample_id,sa.name "
            "FROM qiita.study_sample ss "
            "JOIN qiita.sample_{0} sa "
            "ON ss.sample_id = sa.sample_id "
            "JOIN qiita.study st ON st.study_id = ss.study_id "
            "WHERE (sa.name = 'Billy Bob' OR sa.name = 'Timmy' OR "
            "(sa.name = 'Jimbo' AND sa.name > 25) OR sa.name < 5)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ['name'])

        # test case sensitivity
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 or pH < 9")
        # need to split sql because set used to create so can't guarantee order
        st_sql = st_sql.split(" INTERSECT ")

        exp_st_sql = ["SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8')", "SELECT study_id FROM "
                      "qiita.study_sample_columns WHERE lower(column_name) = "
                      "lower('pH') and column_type in ('integer', 'float8')",
                      "SELECT study_id from qiita.study_portal "
                      "JOIN qiita.portal_type USING (portal_type_id) "
                      "WHERE portal = 'QIITA'"]
        exp_samp_sql = ("SELECT ss.sample_id,sa.pH,sa.ph "
                        "FROM qiita.study_sample ss "
                        "JOIN qiita.sample_{0} sa "
                        "ON ss.sample_id = sa.sample_id "
                        "JOIN qiita.study st ON st.study_id = ss.study_id "
                        "WHERE (sa.ph > 7 OR sa.ph < 9)")
        # use the split list to make sure the SQL is properly formed
        self.assertEqual(len(st_sql), 3)
        for pos, query in enumerate(exp_st_sql):
            self.assertEqual(st_sql[pos], query)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(len(meta), 2)
        assert "ph" in meta
        assert "pH" in meta

    def test_call(self):
        obs_res, obs_meta = self.search(
            '(sample_type = ENVO:soil AND COMMON_NAME = "rhizosphere '
            'metagenome" ) AND NOT Description_duplicate includes Burmese',
            User("*****@*****.**"))
        exp_meta = ["COMMON_NAME", "Description_duplicate", "sample_type"]
        exp_res = {1:
                   [['1.SKM4.640180', 'rhizosphere metagenome', 'Bucu Rhizo',
                     'ENVO:soil'],
                    ['1.SKM5.640177', 'rhizosphere metagenome', 'Bucu Rhizo',
                     'ENVO:soil'],
                    ['1.SKD4.640185', 'rhizosphere metagenome', 'Diesel Rhizo',
                     'ENVO:soil'],
                    ['1.SKD6.640190', 'rhizosphere metagenome', 'Diesel Rhizo',
                    'ENVO:soil'],
                    ['1.SKM6.640187', 'rhizosphere metagenome', 'Bucu Rhizo',
                     'ENVO:soil'],
                    ['1.SKD5.640186', 'rhizosphere metagenome', 'Diesel Rhizo',
                     'ENVO:soil']]}
        self.assertEqual(obs_res, exp_res)
        self.assertEqual(obs_meta, exp_meta)

    def test_call_bad_meta_category(self):
        obs_res, obs_meta = self.search(
            'BAD_NAME_THING = ENVO:soil', User("*****@*****.**"))
        self.assertEqual(obs_res, {})
        self.assertEqual(obs_meta, ["BAD_NAME_THING"])

    def test_call_no_results(self):
        """makes sure a call on a required sample ID column that has no results
        actually returns no results"""
        obs_res, obs_meta = self.search('sample_type = unicorns_and_rainbows',
                                        User('*****@*****.**'))
        self.assertEqual(obs_res, {})
        self.assertEqual(obs_meta, ['sample_type'])

    def test_filter_by_processed_data(self):
        search = QiitaStudySearch()
        results, meta_cols = search('study_id = 1', User('*****@*****.**'))
        spid, pds, meta = search.filter_by_processed_data()
        exp_spid = {1: {'18S': [1]}}
        exp_pds = {1: [
            '1.SKB1.640202', '1.SKB2.640194', '1.SKB3.640195', '1.SKB4.640189',
            '1.SKB5.640181', '1.SKB6.640176', '1.SKB7.640196', '1.SKB8.640193',
            '1.SKB9.640200', '1.SKD1.640179', '1.SKD2.640178', '1.SKD3.640198',
            '1.SKD4.640185', '1.SKD5.640186', '1.SKD6.640190', '1.SKD7.640191',
            '1.SKD8.640184', '1.SKD9.640182', '1.SKM1.640183', '1.SKM2.640199',
            '1.SKM3.640197', '1.SKM4.640180', '1.SKM5.640177', '1.SKM6.640187',
            '1.SKM7.640188', '1.SKM8.640201', '1.SKM9.640192']}
        exp_meta = pd.DataFrame.from_dict({x: 1 for x in exp_pds[1]},
                                          orient='index')
        exp_meta.rename(columns={0: 'study_id'}, inplace=True)

        self.assertEqual(spid, exp_spid)
        self.assertEqual(pds, exp_pds)
        self.assertEqual(meta.keys(), [1])
        assert_frame_equal(meta[1], exp_meta)
Esempio n. 7
0
class SearchTest(TestCase):
    """Tests that the search object works as expected"""

    def setUp(self):
        self.search = QiitaStudySearch()

    def test_parse_study_search_string(self):
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("altitude > 0")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('altitude') and column_type "
                      "in ('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE sa.altitude > 0")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["altitude"])

        # test NOT
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("NOT altitude > 0")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('altitude') and column_type "
                      "in ('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE NOT "
                        "sa.altitude > 0")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["altitude"])

        # test AND
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 and ph < 9")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE (sa.ph > 7 AND "
                        "sa.ph < 9)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["ph"])

        # test OR
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 or ph < 9")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE (sa.ph > 7 OR "
                        "sa.ph < 9)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["ph"])

        # test includes
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string(
                'host_subject_id includes "Chicken little"')
        exp_st_sql = "SELECT study_id FROM qiita.study_sample_columns"
        exp_samp_sql = ("SELECT r.sample_id,r.host_subject_id FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE "
                        "LOWER(r.host_subject_id) LIKE '%chicken little%'")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["host_subject_id"])

        # test complex query
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string(
                'name = "Billy Bob" or name = "Timmy" or name=Jimbo and '
                'name > 25 or name < 5')
        exp_st_sql = (
            "SELECT study_id FROM qiita.study_sample_columns WHERE "
            "lower(column_name) = lower('name') and column_type in "
            "('varchar')")
        exp_samp_sql = (
            "SELECT r.sample_id,sa.name FROM qiita.required_sample_info r JOIN"
            " qiita.sample_{0} sa ON sa.sample_id = r.sample_id JOIN "
            "qiita.study st ON st.study_id = r.study_id WHERE (sa.name = "
            "'Billy Bob' OR sa.name = 'Timmy' OR (sa.name = 'Jimbo' AND "
            "sa.name > 25) OR sa.name < 5)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ['name'])

        # test case sensitivity
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 or pH < 9")
        # need to split sql because set used to create so can't guarantee order
        st_sql = st_sql.split(" INTERSECT ")

        exp_st_sql = ["SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('pH') and column_type in "
                      "('integer', 'float8')", "SELECT study_id FROM "
                      "qiita.study_sample_columns WHERE lower(column_name) = "
                      "lower('ph') and column_type in ('integer', 'float8')"]
        exp_samp_sql = ("SELECT r.sample_id,sa.pH,sa.ph FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE (sa.ph > 7 OR "
                        "sa.ph < 9)")
        # use the split list to make sure the SQL is properly formed
        self.assertEqual(len(st_sql), 2)
        pos = exp_st_sql.index(st_sql[0])
        del exp_st_sql[pos]
        pos = exp_st_sql.index(st_sql[1])
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(len(meta), 2)
        assert "ph" in meta
        assert "pH" in meta

    def test_call(self):
        obs_res, obs_meta = self.search(
            '(sample_type = ENVO:soil AND COMMON_NAME = "rhizosphere '
            'metagenome" ) AND NOT Description_duplicate includes Burmese',
            "*****@*****.**")
        exp_meta = ["COMMON_NAME", "Description_duplicate", "sample_type"]
        exp_res = {1:
                   [['SKM4.640180', 'rhizosphere metagenome', 'Bucu Rhizo',
                     'ENVO:soil'],
                    ['SKM5.640177', 'rhizosphere metagenome', 'Bucu Rhizo',
                     'ENVO:soil'],
                    ['SKD4.640185', 'rhizosphere metagenome', 'Diesel Rhizo',
                     'ENVO:soil'],
                    ['SKD6.640190', 'rhizosphere metagenome', 'Diesel Rhizo',
                    'ENVO:soil'],
                    ['SKM6.640187', 'rhizosphere metagenome', 'Bucu Rhizo',
                     'ENVO:soil'],
                    ['SKD5.640186', 'rhizosphere metagenome', 'Diesel Rhizo',
                     'ENVO:soil']]}
        self.assertEqual(obs_res, exp_res)
        self.assertEqual(obs_meta, exp_meta)

    def test_call_bad_meta_category(self):
        obs_res, obs_meta = self.search(
            'BAD_NAME_THING = ENVO:soil', "*****@*****.**")
        self.assertEqual(obs_res, {})
        self.assertEqual(obs_meta, ["BAD_NAME_THING"])

    def test_call_no_results(self):
        """makes sure a call on a required sample ID column that has no results
        actually returns no results"""
        obs_res, obs_meta = self.search('sample_type = unicorns_and_rainbows',
                                        '*****@*****.**')
        self.assertEqual(obs_res, {})
        self.assertEqual(obs_meta, ['sample_type'])
Esempio n. 8
0
 def setUp(self):
     self.search = QiitaStudySearch()
Esempio n. 9
0
class SearchTest(TestCase):
    """Tests that the search object works as expected"""
    def setUp(self):
        self.search = QiitaStudySearch()

    def test_parse_study_search_string(self):
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("altitude > 0")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('altitude') and column_type "
                      "in ('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE sa.altitude > 0")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["altitude"])

        # test NOT
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("NOT altitude > 0")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('altitude') and column_type "
                      "in ('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.altitude FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE NOT "
                        "sa.altitude > 0")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["altitude"])

        # test AND
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 and ph < 9")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE (sa.ph > 7 AND "
                        "sa.ph < 9)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["ph"])

        # test OR
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 or ph < 9")
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('ph') and column_type in "
                      "('integer', 'float8')")
        exp_samp_sql = ("SELECT r.sample_id,sa.ph FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE (sa.ph > 7 OR "
                        "sa.ph < 9)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["ph"])

        # test includes
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string(
                'host_subject_id includes "Chicken little"')
        exp_st_sql = "SELECT study_id FROM qiita.study_sample_columns"
        exp_samp_sql = ("SELECT r.sample_id,r.host_subject_id FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE "
                        "LOWER(r.host_subject_id) LIKE '%chicken little%'")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ["host_subject_id"])

        # test complex query
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string(
                'name = "Billy Bob" or name = "Timmy" or name=Jimbo and '
                'name > 25 or name < 5')
        exp_st_sql = ("SELECT study_id FROM qiita.study_sample_columns WHERE "
                      "lower(column_name) = lower('name') and column_type in "
                      "('varchar')")
        exp_samp_sql = (
            "SELECT r.sample_id,sa.name FROM qiita.required_sample_info r JOIN"
            " qiita.sample_{0} sa ON sa.sample_id = r.sample_id JOIN "
            "qiita.study st ON st.study_id = r.study_id WHERE (sa.name = "
            "'Billy Bob' OR sa.name = 'Timmy' OR (sa.name = 'Jimbo' AND "
            "sa.name > 25) OR sa.name < 5)")
        self.assertEqual(st_sql, exp_st_sql)
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(meta, ['name'])

        # test case sensitivity
        st_sql, samp_sql, meta = \
            self.search._parse_study_search_string("ph > 7 or pH < 9")
        # need to split sql because set used to create so can't guarantee order
        st_sql = st_sql.split(" INTERSECT ")

        exp_st_sql = [
            "SELECT study_id FROM qiita.study_sample_columns WHERE "
            "lower(column_name) = lower('pH') and column_type in "
            "('integer', 'float8')", "SELECT study_id FROM "
            "qiita.study_sample_columns WHERE lower(column_name) = "
            "lower('ph') and column_type in ('integer', 'float8')"
        ]
        exp_samp_sql = ("SELECT r.sample_id,sa.pH,sa.ph FROM "
                        "qiita.required_sample_info r JOIN qiita.sample_{0} sa"
                        " ON sa.sample_id = r.sample_id JOIN qiita.study st ON"
                        " st.study_id = r.study_id WHERE (sa.ph > 7 OR "
                        "sa.ph < 9)")
        # use the split list to make sure the SQL is properly formed
        self.assertEqual(len(st_sql), 2)
        pos = exp_st_sql.index(st_sql[0])
        del exp_st_sql[pos]
        pos = exp_st_sql.index(st_sql[1])
        self.assertEqual(samp_sql, exp_samp_sql)
        self.assertEqual(len(meta), 2)
        assert "ph" in meta
        assert "pH" in meta

    def test_call(self):
        obs_res, obs_meta = self.search(
            '(sample_type = ENVO:soil AND COMMON_NAME = "rhizosphere '
            'metagenome" ) AND NOT Description_duplicate includes Burmese',
            "*****@*****.**")
        exp_meta = ["COMMON_NAME", "Description_duplicate", "sample_type"]
        exp_res = {
            1: [[
                'SKM4.640180', 'rhizosphere metagenome', 'Bucu Rhizo',
                'ENVO:soil'
            ],
                [
                    'SKM5.640177', 'rhizosphere metagenome', 'Bucu Rhizo',
                    'ENVO:soil'
                ],
                [
                    'SKD4.640185', 'rhizosphere metagenome', 'Diesel Rhizo',
                    'ENVO:soil'
                ],
                [
                    'SKD6.640190', 'rhizosphere metagenome', 'Diesel Rhizo',
                    'ENVO:soil'
                ],
                [
                    'SKM6.640187', 'rhizosphere metagenome', 'Bucu Rhizo',
                    'ENVO:soil'
                ],
                [
                    'SKD5.640186', 'rhizosphere metagenome', 'Diesel Rhizo',
                    'ENVO:soil'
                ]]
        }
        self.assertEqual(obs_res, exp_res)
        self.assertEqual(obs_meta, exp_meta)

    def test_call_bad_meta_category(self):
        obs_res, obs_meta = self.search('BAD_NAME_THING = ENVO:soil',
                                        "*****@*****.**")
        self.assertEqual(obs_res, {})
        self.assertEqual(obs_meta, ["BAD_NAME_THING"])

    def test_call_no_results(self):
        """makes sure a call on a required sample ID column that has no results
        actually returns no results"""
        obs_res, obs_meta = self.search('sample_type = unicorns_and_rainbows',
                                        '*****@*****.**')
        self.assertEqual(obs_res, {})
        self.assertEqual(obs_meta, ['sample_type'])
Esempio n. 10
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        search_type = self.get_argument('search_type')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
            raise HTTPError(403, 'Unauthorized search!')
        if search_type not in ['user', 'public']:
            raise HTTPError(400, 'Not a valid search type')
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
            except Exception as e:
                # catch any other error as generic server error
                self.clear()
                self.set_status(500)
                self.write("Server error during search. Please try again "
                           "later")
                LogEntry.create('Runtime',
                                str(e),
                                info={
                                    'User': self.current_user.id,
                                    'query': query
                                })
                return
        else:
            study_proc = proc_samples = None
        info = _build_study_info(self.current_user, search_type, study_proc,
                                 proc_samples)
        # linkifying data
        len_info = len(info)
        for i in range(len_info):
            info[i]['shared'] = ", ".join([
                study_person_linkifier(element)
                for element in info[i]['shared']
            ])

            ppid = [pubmed_linkifier([p]) for p in info[i]['publication_pid']]
            pdoi = [doi_linkifier([p]) for p in info[i]['publication_doi']]
            del info[i]['publication_pid']
            del info[i]['publication_doi']
            info[i]['pubs'] = ', '.join(ppid + pdoi)

            info[i]['pi'] = study_person_linkifier(info[i]['pi'])

            info[i]['ebi_info'] = info[i]['ebi_submission_status']
            ebi_study_accession = info[i]['ebi_study_accession']
            if ebi_study_accession:
                info[i]['ebi_info'] = '%s (%s)' % (''.join([
                    EBI_LINKIFIER.format(a)
                    for a in ebi_study_accession.split(',')
                ]), info[i]['ebi_submission_status'])

        # build the table json
        results = {
            "sEcho": echo,
            "iTotalRecords": len_info,
            "iTotalDisplayRecords": len_info,
            "aaData": info
        }

        # return the json in compact form to save transmit size
        self.write(dumps(results, separators=(',', ':')))
Esempio n. 11
0
    def get(self, ignore):
        user = self.get_argument('user')
        query = self.get_argument('query')
        echo = int(self.get_argument('sEcho'))

        if user != self.current_user.id:
<<<<<<< HEAD
            raise HTTPError(403, 'Unauthorized search!')
=======
            raise HTTPError(403, reason='Unauthorized search!')
        if search_type not in ['user', 'public']:
            raise HTTPError(400, reason='Not a valid search type')
>>>>>>> 405cbef0c9f71c620da95a0c1ba6c7d3d588b3ed
        if query:
            # Search for samples matching the query
            search = QiitaStudySearch()
            try:
                search(query, self.current_user)
                study_proc, proc_samples, _ = search.filter_by_processed_data()
            except ParseException:
                self.clear()
                self.set_status(400)
                self.write('Malformed search query. Please read "search help" '
                           'and try again.')
                return
            except QiitaDBIncompatibleDatatypeError as e:
                self.clear()
                self.set_status(400)
                searchmsg = ''.join(e)
                self.write(searchmsg)
                return
Esempio n. 12
0
    def post(self):
        user = self.current_user
        action = self.get_argument("action")
        # set required template variables
        results = {}
        meta_headers = []
        counts = {}
        fullcounts = {}
        query = ""
        searchmsg = ""
        selsamples = {}
        selproc_data = {}
        # get analysis and selected samples if exists, or create if necessary
        if action == "create":
            name = self.get_argument('name')
            description = self.get_argument('description')
            analysis = Analysis.create(User(user), name, description)
            analysis_id = analysis.id
            # set to second step since this page is second step in workflow
            analysis.step = SELECT_SAMPLES
            # fill example studies by running query for specific studies
            search = QiitaStudySearch()
            def_query = 'study_id = 1 OR study_id = 2 OR study_id = 3'
            results, meta_headers = search(def_query, user)
            results, counts, fullcounts = self._parse_search_results(
                results, selsamples, meta_headers)
        else:
            analysis_id = int(self.get_argument("analysis-id"))
            check_analysis_access(User(user), analysis_id)
            analysis = Analysis(analysis_id)
            selproc_data, selsamples = self._selected_parser(analysis)

        # run through action requested
        if action == "search":
            search = QiitaStudySearch()
            query = str(self.get_argument("query"))
            try:
                results, meta_headers = search(query, user)
            except ParseException:
                searchmsg = "Malformed search query, please read search help."
            except QiitaDBIncompatibleDatatypeError as e:
                searchmsg = ''.join(e)

            if not results and not searchmsg:
                searchmsg = "No results found."
            else:
                results, counts, fullcounts = self._parse_search_results(
                    results, selsamples, meta_headers)

        elif action == "select":
            analysis.add_samples(self._parse_form_select())

            # rebuild the selected from database to reflect changes
            selproc_data, selsamples = self._selected_parser(analysis)

        elif action == "deselect":
            proc_data, samples = self._parse_form_deselect()
            if proc_data:
                analysis.remove_samples(proc_data=proc_data)
            if samples:
                analysis.remove_samples(samples=samples)
            if not proc_data and not samples:
                searchmsg = "Must select samples to remove from analysis!"

            # rebuild the selected from database to reflect changes
            selproc_data, selsamples = self._selected_parser(analysis)

        self.render('search_studies.html',
                    user=user,
                    aid=analysis_id,
                    results=results,
                    meta_headers=meta_headers,
                    selsamples=selsamples,
                    selproc_data=selproc_data,
                    counts=counts,
                    fullcounts=fullcounts,
                    searchmsg=searchmsg,
                    query=query,
                    availmeta=SampleTemplate.metadata_headers() +
                    get_table_cols("study"))