Esempio n. 1
0
 def get(self, term):
     """
     Returns list of matching concepts or entities using lexical search
     """
     args = simple_parser.parse_args()
     q = GolrSearchQuery(term, user_agent=USER_AGENT, **args)
     results = q.search()
     return results
Esempio n. 2
0
 def get(self, term):
     """
     Returns list of matching concepts or entities using lexical search
     """
     args = simple_parser.parse_args()
     q = GolrSearchQuery(term, **args)
     results = q.autocomplete()
     return results
Esempio n. 3
0
 def get(self, term):
     """
     Returns list of matching concepts or entities using lexical search
     """
     args = simple_parser.parse_args()
     args['fq_string'] = copy.copy(args['fq'])
     args['fq'] = {}
     q = GolrSearchQuery(term, user_agent=USER_AGENT, **args)
     results = q.autocomplete()
     return results
Esempio n. 4
0
    def setup_class(self):
        self.manager = GolrSearchQuery()

        # Mock the PySolr search function to
        # return our test docs
        input_fh = os.path.join(os.path.dirname(__file__),
                                'resources/solr/solr-docs.json')
        input_docs = json.load(open(input_fh))
        self.test_results = pysolr.Results(input_docs)
        self.manager.solr.search = MagicMock(return_value=self.test_results)
Esempio n. 5
0
def test_search_go_ontol():
    q = GolrSearchQuery("transport*", category='ontology_class', is_go=True)
    print("Q={}".format(q))
    params = q.solr_params()
    print("PARAMS={}".format(params))
    results = q.exec()
    print("RESULTS={}".format(results))
    docs = results['docs']
    for r in docs:
        print(str(r))
    assert len(docs) > 0
Esempio n. 6
0
def test_search():
    q = GolrSearchQuery("abnormal")
    print("Q={}".format(q))
    params = q.solr_params()
    print("PARAMS={}".format(params))
    results = q.exec()
    print("RESULTS={}".format(results))
    docs = results['docs']
    for r in docs:
        print(str(r))
    assert len(docs) > 0
Esempio n. 7
0
def test_search_go_all():
    q = GolrSearchQuery("transport*", is_go=True)
    print("Q={}".format(q))
    params = q.solr_params()
    print("PARAMS={}".format(params))
    results = q.exec()
    print("RESULTS={}".format(results))
    docs = results['docs']
    for r in docs:
        print(str(r))
    assert len(docs) > 0
    print(str(results['facet_counts']))
Esempio n. 8
0
def clean_feature_ids(id: str) -> str:
    """
    MME queries often need to be sanitized before going into owlsim, for example:
    MIM:610536 -> OMIM:610536
    SHH -> HGNC:10848
    """
    if ':' in id:
        prefix, reference = id.split(':')
        if prefix == 'MIM':
            id = 'OMIM:' + reference
    else:
        # Assume it's a label and look it up
        # Assume it's human, and make sure it's an exact match
        query = GolrSearchQuery(id, taxon=['NCBITaxon:9606'], min_match="100%")
        results = query.search()
        if results.docs:
            id = results.docs[0]['id']

    return id
Esempio n. 9
0
def get_concepts():
    keywords = request.args.get('keywords', None)
    semanticGroups = request.args.get('semanticGroups', None)
    pageSize = int(request.args.get('pageSize', 1))
    pageNumber = int(request.args.get('pageNumber', 1))

    validatePagination(pageSize, pageNumber)
    validateKeywords(keywords)

    q = GolrSearchQuery(term=keywords,
                        category=build_categories(semanticGroups),
                        rows=pageSize,
                        start=getStartIndex(pageNumber, pageSize))

    results = q.exec()

    concepts = []
    for d in results['docs']:
        concept = parse_concept(d)
        concepts.append(concept)

    return jsonify(concepts)
Esempio n. 10
0
def get_concept(conceptId):

    if conceptId.startswith("biolink"):
        conceptId = objectId(conceptId)

    results = GolrSearchQuery(term=conceptId,
                              fq={
                                  'id': conceptId
                              },
                              rows=1,
                              hl=False).exec()

    c = None
    entries = []
    for d in results['docs']:

        c = parse_concept(d)
        break

    return c
Esempio n. 11
0
def find_exactmatches(conceptId):
    """
    Returns a list of concept ID's that are exact matches for the given conceptId
    """
    results = GolrSearchQuery(term=conceptId,
                              fq={
                                  'id': conceptId
                              },
                              rows=1,
                              hl=False).exec()

    docs = results['docs']

    for d in docs:
        if get_concept_property(d, 'id') == conceptId:
            exactmatches = get_concept_property(d, 'equivalent_curie')
            if exactmatches == None:
                exactmatches = []  # just in case this property is empty
            exactmatches.append(conceptId)
            return exactmatches if exactmatches != None else []
    return []
Esempio n. 12
0
def get_concept_details(conceptId):

    if conceptId.startswith("biolink"):
        conceptId = objectId(conceptId)

    results = GolrSearchQuery(term=conceptId,
                              fq={
                                  'id': conceptId
                              },
                              rows=1,
                              hl=False).exec()

    entries = []
    for d in results['docs']:

        c = parse_concept(d)

        details = {}
        details['iri'] = get_concept_property(d, 'iri')
        details['taxon'] = get_concept_property(d, 'taxon')
        details['taxon_label'] = get_concept_property(d, 'taxon_label')
        details['taxon_label_synonym'] = get_concept_property(
            d, 'taxon_label_synonym')

        if details['taxon_label_synonym'] is not None:
            details['taxon_label_synonym'] = ', '.join(
                details['taxon_label_synonym'])

        c['details'] = [{
            'tag': k,
            'value': v
        } for k, v in details.items() if v is not None]

        entries += [c]

    return jsonify(entries)
Esempio n. 13
0
def test_cursor():
    """
    Tests rows and start parameters.

    First fetch 100 docs, then same query but iterate with cursor in increments of ten.

    The two sets of IDs returned should be identicial
    """
    q = GolrSearchQuery("abnormal", rows=100)
    results = q.exec()
    docs = results['docs']
    ids = set([d['id'] for d in docs])
    print('Init ids={}'.format(ids))
    assert len(ids) == 100
    matches = set()
    for i in range(0,10):
        q = GolrSearchQuery("abnormal", start=i*10, rows=10)
        docs = q.exec()['docs']
        next_ids = [d['id'] for d in docs]
        assert len(next_ids) == 10
        print('Next ids (from {}) = {}'.format(i*10, next_ids))
        matches.update(next_ids)
    assert len(matches) == 100
    assert len(matches.intersection(ids)) == 100
Esempio n. 14
0
def main():
    """
    Wrapper for OGR
    """

    parser = argparse.ArgumentParser(
        description='Command line interface to python-ontobio.golr library'
        """

        Provides command line interface onto the ontobio.golr python library, a high level
        abstraction layer over Monarch and GO solr indices.
        """,
        formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('-A',
                        '--associations',
                        dest='associations',
                        action='store_true',
                        default=False,
                        help='Path to output file')
    parser.add_argument('-s',
                        '--settings',
                        type=str,
                        help='Path to config file')
    parser.add_argument('-o',
                        '--outfile',
                        type=str,
                        required=False,
                        help='Path to output file')
    parser.add_argument('-f',
                        '--facets',
                        type=str,
                        required=False,
                        help='Facet fields: comma-delimited')
    parser.add_argument('-q',
                        '--fq',
                        type=json.loads,
                        default={},
                        required=False,
                        help='Facet query (solr fq) - should be json')
    parser.add_argument(
        '-Q',
        '--qargs',
        type=json.loads,
        default={},
        required=False,
        help='Query to be passed directly to python golr_associations query')
    parser.add_argument('-l',
                        '--legacy_solr',
                        dest='legacy_solr',
                        action='store_true',
                        default=False,
                        help='Set for legacy solr schema (solr3 golr)')
    parser.add_argument('-u',
                        '--url',
                        type=str,
                        required=False,
                        help='Solr URL. E.g. http://localhost:8983/solr/golr')
    parser.add_argument('-v',
                        '--verbosity',
                        default=0,
                        action='count',
                        help='Increase output verbosity')

    parser.add_argument('search', type=str, help='Search terms')

    args = parser.parse_args()

    if args.verbosity >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbosity == 1:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.info("Welcome!")

    facets = []
    if args.facets is not None:
        facets = args.facets.split(",")

    config = None
    if args.settings is not None:
        from ontobio.config import load_config
        config = load_config(args.settings)
    results = None

    if args.associations:
        q = None
        if args.search != '%':
            q = args.search

        q = GolrAssociationQuery(q=q,
                                 is_go=args.legacy_solr,
                                 fq=args.fq,
                                 facet_fields=facets,
                                 url=args.url)

        results = q.exec()
        #print("RESULTS={}".format(results))
        docs = results['associations']
        print("RESULTS: {}".format(len(docs)))
        for r in docs:
            print(str(r))
    else:
        logging.info("FQ={}".format(args.fq))
        q = GolrSearchQuery(args.search,
                            is_go=args.legacy_solr,
                            fq=args.fq,
                            facet_fields=facets,
                            url=args.url)

        results = q.exec()
        #print("RESULTS={}".format(results))
        docs = results['docs']
        print("RESULTS: {}".format(len(docs)))
        for r in docs:
            print(" {} '{}' {} // {}".format(r['id'], r['label'], r['score'],
                                             r['category']))

    if len(facets) > 0:
        #from collections import OrderedDict
        fcs = results['facet_counts']
        for f in facets:
            d = fcs[f]
            print(str(d))
            print("## FACET: {}".format(f))
            for k, v in sorted(d.items(), key=lambda t: -t[1]):
                print("  {:5d}: {}".format(v, k))
Esempio n. 15
0
def test_solr_404():
    q = GolrSearchQuery("abnormal")
    q.update_solr_url("https://httpbin.org/status/404")
    pytest.raises(pysolr.SolrError, q.search)
Esempio n. 16
0
class TestGolrSearchQuery():

    @classmethod
    def setup_class(self):
        self.manager = GolrSearchQuery()

        # Mock the PySolr search function to
        # return our test docs
        input_fh = os.path.join(os.path.dirname(__file__),
                                'resources/solr/solr-docs.json')
        input_docs = json.load(open(input_fh))
        self.test_results = pysolr.Results(input_docs)
        self.manager.solr.search = MagicMock(return_value=self.test_results)

    @classmethod
    def teardown_class(self):
        self.manager = None

    def test_longest_hl(self):
        test_data = [
            "<em>Muscle</em> <em>atrophy</em>, generalized",
            "Generalized <em>muscle</em> degeneration",
            "Diffuse skeletal <em>muscle</em> wasting"
        ]
        expected = "<em>Muscle</em> <em>atrophy</em>, generalized"
        results = self.manager._get_longest_hl(test_data)
        assert expected == results

    def test_longest_hl_ambiguous(self):
        test_data = [
            "<em>Muscle</em> <em>atrophy</em>, generalized",
            "Generalized <em>muscle</em> degeneration",
            "Diffuse skeletal <em>muscle</em> wasting",
            "<em>Muscle</em> <em>atrophy</em>, not generalized",
        ]
        expected = "<em>Muscle</em> <em>atrophy</em>, generalized"
        results = self.manager._get_longest_hl(test_data)
        assert expected == results

    def test_hl_to_string(self):
        test_data = "Foo <em>Muscle</em> bar <em>atrophy</em>, generalized"
        expected = "Foo Muscle bar atrophy, generalized"
        results = self.manager._hl_as_string(test_data)
        assert expected == results

    def test_invalid_xml(self):
        test_data = "Foo<Foo> <em>Muscle</em> bar <em>atrophy</em>, generalized"
        pytest.raises(ET.ParseError, self.manager._hl_as_string, test_data)

    def test_autocomplete_doc_conversion(self):
        """
        Given a sample solr output as a pysolr.Results object
        test that _process_autocomplete_results returns the
        expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/autocomplete-expected.json')
        processed_docs = json.load(open(expected_fh))
        output_docs = self.manager._process_autocomplete_results(self.test_results)

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_search_doc_conversion(self):
        """
        Given a sample solr output as a pysolr.Results object
        test that _process_autocomplete_results returns the
        expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/search-expected.json')
        processed_docs = json.load(open(expected_fh))

        output_docs = self.manager._process_search_results(self.test_results)

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_search(self):
        """
        Given a mock PySolr.search method test that
        search() returns the expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/search-expected.json')
        processed_docs = json.load(open(expected_fh))
        output_docs = self.manager.search()

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_autocomplete(self):
        """
        Given a mock PySolr.search method test that
        autocomplete() returns the expected object
        """
        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/autocomplete-expected.json')
        processed_docs = json.load(open(expected_fh))
        output_docs = self.manager.autocomplete()

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)

    def test_autocomplete_no_category(self):
        """
        Test for document without a category
        """
        # Provide a new mock file
        input_fh = os.path.join(os.path.dirname(__file__),
                                'resources/solr/autocomplete-nocat.json')
        input_docs = json.load(open(input_fh))
        self.test_results = pysolr.Results(input_docs)
        self.manager.solr.search = MagicMock(return_value=self.test_results)

        expected_fh = os.path.join(os.path.dirname(__file__),
                                   'resources/solr/autocomplete-nocat-expect.json')
        processed_docs = json.load(open(expected_fh))

        output_docs = self.manager.autocomplete()
        print(json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True))

        assert json.dumps(processed_docs, sort_keys=True) == \
               json.dumps(output_docs,
                          default=lambda obj: getattr(obj, '__dict__', str(obj)),
                          sort_keys=True)
Esempio n. 17
0
def search(term, args):
    q = GolrSearchQuery(term, args)
    return q.search()