def _parse_query(query_str): stateful_parser = StatefulParser() restructuring_visitor = RestructuringVisitor() elastic_search_visitor = ElasticSearchVisitor() _, parse_tree = stateful_parser.parse(query_str, parser.Query) parse_tree = parse_tree.accept(restructuring_visitor) return parse_tree.accept(elastic_search_visitor)
def test_restructuring_visitor_functionality(query_str, expected_parse_tree): print("Parsing: " + query_str) stateful_parser = StatefulParser() restructuring_visitor = RestructuringVisitor() _, parse_tree = stateful_parser.parse(query_str, parser.Query) parse_tree = parse_tree.accept(restructuring_visitor) assert parse_tree == expected_parse_tree
def test_parsing_output_with_inspire_next_tests(query_str, expected_parse_tree): print("Parsing: " + query_str) stateful_parser = StatefulParser() restructuring_visitor = RestructuringVisitor() _, parse_tree = stateful_parser.parse(query_str, parser.Query) parse_tree = parse_tree.accept(restructuring_visitor) assert parse_tree == expected_parse_tree
def print_query_and_parse_tree(query_str): parser = StatefulParser() print('\033[94m' + "Parsing " + '\033[1m' + query_str + "" + '\033[0m') _, parse_tree = parser.parse(query_str, Query) print('\033[92m' + emit_tree_format(parse_tree.accept(RestructuringVisitor())) + '\033[0m') print( "————————————————————————————————————————————————————————————————————————————————" )
def test_that_parse_terminal_token_does_accept_keywords_if_parsing_parenthesized_terminal_flag_is_on( ): query_str = 'and' parser = StatefulParser() parser._parsing_parenthesized_terminal = True returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token( parser, query_str) assert returned_unrecognised_text == '' assert returned_result == query_str
def test_foo_bar(): query_str = 'find j Nucl.Phys. and not vol A531 and a ellis' print("Parsing: " + query_str) stateful_parser = StatefulParser() restructuring_visitor = RestructuringVisitor() _, parse_tree = stateful_parser.parse(query_str, parser.Query) parse_tree = parse_tree.accept(restructuring_visitor) expected_parse_tree = AndOp( KeywordOp(Keyword('journal'), Value('Nucl.Phys.')), KeywordOp(Keyword('author'), Value('ellis'))) assert parse_tree == expected_parse_tree
def test_that_parse_terminal_token_does_not_accept_token_followed_by_colon(): query_str = 'title:' parser = StatefulParser() returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token( parser, query_str) assert isinstance(returned_result, SyntaxError) assert returned_unrecognised_text == query_str
def test_that_parse_terminal_token_does_not_accept_non_shortened_inspire_keywords( ): query_str = "exact-author" parser = StatefulParser() returned_unrecognised_text, returned_result = SimpleValueUnit.parse_terminal_token( parser, query_str) assert isinstance(returned_result, SyntaxError) assert returned_unrecognised_text == query_str
def test_simple_value_accepted_tokens(query_str, unrecognized_text, result): parser = StatefulParser() returned_unrecognised_text, returned_result = SimpleValue.parse( parser, query_str, None) if type(result) != SyntaxError: assert returned_unrecognised_text == unrecognized_text assert returned_result == result else: assert returned_unrecognised_text == unrecognized_text assert isinstance(returned_result, SyntaxError) and result.msg == result.msg
def test_parser_functionality(query_str, expected_parse_tree): print("Parsing: " + query_str) parser = StatefulParser() _, parse_tree = parser.parse(query_str, Query) assert parse_tree == expected_parse_tree
def parse_query(query_str): """ Drives the whole logic, by parsing, restructuring and finally, generating an ElasticSearch query. Args: query_str (six.text_types): the given query to be translated to an ElasticSearch query Returns: six.text_types: Return an ElasticSearch query. Notes: In case there's an error, an ElasticSearch `multi_match` query is generated with its `query` value, being the query_str argument. """ def _generate_match_all_fields_query(): # Strip colon character (special character for ES) stripped_query_str = ' '.join(query_str.replace(':', ' ').split()) return { 'multi_match': { 'query': stripped_query_str, 'fields': ['_all'], 'zero_terms_query': 'all' } } if not isinstance(query_str, six.text_type): query_str = six.text_type(query_str.decode('utf-8')) logger.info('Parsing: "' + query_str + '\".') parser = StatefulParser() rst_visitor = RestructuringVisitor() es_visitor = ElasticSearchVisitor() try: unrecognized_text, parse_tree = parser.parse(query_str, Query) if unrecognized_text: # Usually, should never happen. msg = 'Parser returned unrecognized text: "' + unrecognized_text + \ '" for query: "' + query_str + '".' if query_str == unrecognized_text and parse_tree is None: # Didn't recognize anything. logger.warn(msg) return _generate_match_all_fields_query() else: msg += 'Continuing with recognized parse tree.' logger.warn(msg) except SyntaxError as e: logger.warn('Parser syntax error (' + six.text_type(e) + ') with query: "' + query_str + '". Continuing with a match_all with the given query.') return _generate_match_all_fields_query() # Try-Catch-all exceptions for visitors, so that search functionality never fails for the user. try: restructured_parse_tree = parse_tree.accept(rst_visitor) logger.debug('Parse tree: \n' + emit_tree_format(restructured_parse_tree)) except Exception as e: logger.exception(RestructuringVisitor.__name__ + " crashed" + (": " + six.text_type(e) + ".") if six.text_type(e) else '.') return _generate_match_all_fields_query() try: es_query = restructured_parse_tree.accept(es_visitor) except Exception as e: logger.exception(ElasticSearchVisitor.__name__ + " crashed" + (": " + six.text_type(e) + ".") if six.text_type(e) else '.') return _generate_match_all_fields_query() if not es_query: # Case where an empty query was generated (i.e. date query with malformed date, e.g. "d < 200"). return _generate_match_all_fields_query() return es_query