예제 #1
0
class SpiresOrQuery(UnaryRule):
    grammar = (omit(re.compile(r"or", re.I)), [
        (omit(Whitespace), attr('op', SpiresSimpleQuery)),
        (omit(_), attr('op', SpiresParenthesizedQuery)),
        (omit(Whitespace), attr('op', SpiresValueQuery)),
        (omit(re.compile(r".*", re.I)), attr('op', EmptyQueryRule)),
    ])
예제 #2
0
class BetweenDateExpression(Expression):
    grammar = \
        omit(symbols['between']), attr('min', Date), \
        omit(symbols['and']), attr('max', Date)

    def build(self, builder):
        builder.add('''(date >= ? AND date <= ?)''', self.min, self.max)
예제 #3
0
class LessEqualOp(UnaryRule):
    """Less than or Equal to operator.

    Supports queries like date <= 10-2000 or author-count 100-.
    """
    grammar = [
        (omit(Literal("<=")), attr('op', SimpleValue)),
        # Accept a number or numbers that are separated with (/ or -) followed by a "-" which should be
        # followed by \s or ) or end of input so that you don't accept a value like 1-e.
        (attr('op', re.compile(r"\d+([/-]\d+)*(?=-)")), omit(re.compile(r'-(?=\s|\)|$)'))),
    ]
예제 #4
0
class SpiresNotQuery(UnaryRule):
    grammar = (
        [
            omit(re.compile(r"and\s+not", re.I)),
            omit(re.compile(r"not", re.I)),
        ],
        [
            (omit(Whitespace), attr('op', SpiresSimpleQuery)),
            (omit(_), attr('op', SpiresParenthesizedQuery)),
            (omit(Whitespace), attr('op', SpiresValueQuery)),
        ],
    )
예제 #5
0
class Value(UnaryRule):
    """Generic rule for all kinds of phrases recognized.

    Serves as an encapsulation of the listed rules.
    """
    grammar = attr('op',
                   [(optional(omit(Literal("="))), RangeOp), GreaterEqualOp,
                    LessEqualOp, GreaterThanOp, LessThanOp,
                    (optional(omit(Literal("="))), [
                        ComplexValue, ParenthesizedSimpleValues,
                        SimpleValueBooleanQuery, SimpleValue
                    ])])
class LessEqualOp(UnaryRule):
    """Less than or Equal to operator.

    Supports queries like date <= 10-2000 or author-count 100-.
    """
    grammar = [
        (omit(Literal("<=")), attr('op', SimpleValue)),
        # Accept a number or anything that doesn't contain {whitespace, (, ), :} followed by a "-" which should be
        # followed by \s or ) or end of input so that you don't accept a value that is 1-e.
        (attr('op', re.compile(r"\d+")), omit(re.compile(r'-(?=\s|\)|$)'))),
        (attr('op', re.compile(r"[^\s():]+(?=( -|-))")),
         omit(re.compile(r'\+(?=\s|\)|$)'))),
    ]
예제 #7
0
class Main(UnaryRule):
    initialized = False

    def __init__(self):
        """Initialize list of allowed keywords on first call."""
        if not Main.initialized:
            from invenio_query_parser.utils import build_valid_keywords_grammar
            build_valid_keywords_grammar()
            Main.initialized = True

    grammar = [
        (omit(_), attr('op', Query), omit(_)),
        attr('op', EmptyQueryRule),
    ]
예제 #8
0
class OrQuery(UnaryRule):
    grammar = [
        (
            omit(Or),
            [
                (omit(Whitespace), attr('op', SimpleQuery)),
                (omit(_), attr('op', ParenthesizedQuery)),
            ],
        ),
        (
            omit(Literal('|')),
            attr('op', SimpleQuery),
        ),
    ]
예제 #9
0
class AndQuery(UnaryRule):
    grammar = [
        (
            omit(And),
            [
                (omit(Whitespace), attr('op', NotQuery)),
                (omit(Whitespace), attr('op', SimpleQuery)),
                (omit(_), attr('op', ParenthesizedQuery)),
            ],
        ),
        (
            omit(Literal('+')),
            attr('op', SimpleQuery),
        ),
    ]
예제 #10
0
class NotExpression(Expression, List):
    grammar = omit(symbols['not']), expressions

    def build(self, builder):
        builder.add('NOT (')
        self[0].build(builder)
        builder.add(')')
예제 #11
0
class LessThanOp(UnaryRule):
    """Less than operator.

    Supports queries like author-count < 100 or date before 1984.
    """
    grammar = omit(re.compile(r"before|<",
                              re.IGNORECASE)), attr('op', SimpleValue)
예제 #12
0
class GreaterThanOp(UnaryRule):
    """Greater than operator.

    Supports queries like author-count > 2000 or date after 10-2000.
    """
    grammar = omit(re.compile(r"after|>",
                              re.IGNORECASE)), attr('op', SimpleValue)
예제 #13
0
class Main(UnaryRule):
    initialized = False

    def __init__(self):
        """Initialize list of allowed keywords on first call."""
        if not Main.initialized:
            from invenio_query_parser.utils import build_valid_keywords_grammar
            from flask import current_app

            build_valid_keywords_grammar(
                current_app.config.get('SEARCH_ALLOWED_KEYWORDS', []))
            Main.initialized = True

    grammar = [
        (omit(_), attr('op', [FindQuery, Query]), omit(_)),
        attr('op', EmptyQueryRule),
    ]
예제 #14
0
class ParenthesizedSimpleValues(UnaryRule):
    """Parses parenthesized simple values along with boolean operations on them."""
    grammar = omit(Literal("(")), [
        SimpleValueBooleanQuery, SimpleValueNegation, SimpleValue
    ], omit(Literal(")"))

    @classmethod
    def parse(cls, parser, text, pos):
        """Using our own parse to enable the flag below."""
        try:
            parser._parsing_parenthesized_simple_values_expression = True
            remaining_text, recognized_tokens = parser.parse(text, cls.grammar)
            return remaining_text, recognized_tokens
        except SyntaxError as e:
            return text, e
        finally:
            parser._parsing_parenthesized_simple_values_expression = False
예제 #15
0
class EmptyQuery(LeafRule):
    grammar = omit(optional(whitespace))

    def __init__(self):
        self.value = None

    def __repr__(self):
        return '%s()' % self.__class__.__name__
예제 #16
0
    def parse(cls, parser, text, pos):
        # Used to check whether we parsed successfully up to
        left_operand, operator = None, None
        try:
            # Parse left operand
            text_after_left_op, left_operand = parser.parse(text, cls.grammar[0])

            # Parse boolean operators
            text_after_bool_op, operator = parser.parse(text_after_left_op, cls.grammar[1])
            if not operator:  # Implicit AND at terminals level
                operator = And(BooleanOperator.AND)

            # Parse right operand.
            # We don't want to eagerly recognize anything else other than a SimpleValue.
            # So we attempt to recognize the more specific rules, and if we do, then we need to stop identifying this
            # rule.
            parser.parse(
                text_after_bool_op,
                [
                    (
                        omit(optional(Not)),
                        [
                            InvenioKeywordQuery,
                            SpiresKeywordQuery,
                        ]
                     ),
                    [
                        RangeOp,
                        GreaterEqualOp,
                        LessEqualOp,
                        GreaterThanOp,
                        LessThanOp,
                        ComplexValue
                    ]
                ]
            )

            # Identified something other than a SimpleValue, stop parsing this rule.
            result = text, SyntaxError("expected simple value related rule as right operand of a " +
                                       cls.__name__)

        except SyntaxError as e:
            result = text, e

            if left_operand and operator:
                # Attempt to parse a right operand
                try:
                    remaining_text, right_operand = parser.parse(text_after_bool_op, cls.grammar[2])
                    result = remaining_text, SimpleValueBooleanQuery(
                        left_operand,
                        bool_op=operator,
                        right=right_operand
                    )
                except SyntaxError as e:  # Actual failure of parsing boolean query at terminals level
                    return text, e

        return result
예제 #17
0
class Query(ListRule):
    """The entry-point for the grammar.

    Find keyword is ignored as the current grammar is an augmentation of SPIRES and Invenio style syntaxes.
    It only serves for backward compatibility with SPIRES syntax.
    """
    grammar = [
        (omit(optional(re.compile(r"(find|fin|fi|f)\s", re.IGNORECASE))),
         (Statement, maybe_some(MalformedQueryWords))),
        MalformedQueryWords,
        EmptyQuery,
    ]
예제 #18
0
class RangeOp(BinaryRule):
    """Range operator mixing any type of values.

    E.g.    muon decay year:1983->1992
            author:"Ellis, J"->"Ellis, Qqq"
            author:"Ellis, J"->Ellis, M

    The non symmetrical type of values will be handled at a later phase.
    """
    grammar = \
        attr('left', [ComplexValue, SimpleRangeValue]), \
        omit(Literal("->")), \
        attr('right', [ComplexValue, SimpleRangeValue])
예제 #19
0
class InvenioKeywordQuery(BinaryRule):
    """Keyword queries with colon separator (i.e. Invenio style).

    There needs to be a distinction between Invenio and SPIRES keyword queries, so as the parser is able to recognize
    any terminal as keyword for the former ones.

    Note:
        "arxiv:arxiv_identifier" should be excluded from the generic keyword pattern as it is a special case of
        SimpleValue, since it contains ":".
    E.g. author: ellis, title: boson, or unknown_keyword: foo.
    """
    grammar = attr('left', [InspireKeyword, re.compile(r"(?!arxiv)[^\s:]+")]), \
        omit(':'), \
        attr('right', Value)
예제 #20
0
class RelativeDateExpression(Expression):
    grammar = \
        omit(symbols['since']), \
        attr('length', number), \
        attr('unit', TimeUnit)

    def build(self, builder):
        unit = self.unit
        length = -int(self.length)

        if unit == 'week':
            length *= 7
            unit = 'day'

        builder.add('''date >= DATE('now', '{} {}')'''.format(length, unit))
    def parse(cls, parser, text, pos):
        # Used to check whether we parsed successfully up to
        left_operand, operator = None, None
        try:
            # Parse left operand
            text_after_left_op, left_operand = parser.parse(
                text, cls.grammar[0])

            # Parse boolean operators
            text_after_bool_op, operator = parser.parse(
                text_after_left_op, cls.grammar[1])

            # Parse right operand.
            # We don't want to eagerly recognize keyword queries as SimpleValues.
            # So we attempt to firstly recognize the more specific rules (keyword queries and their negation), and
            # then a SimpleValue.
            _ = parser.parse(text_after_bool_op, (omit(
                optional(Not)), [InvenioKeywordQuery, SpiresKeywordQuery]))

            # Keyword query parsing succeeded, stop boolean_op among terminals recognition.
            result = text, SyntaxError(
                "found keyword query at terminals level")

        except SyntaxError as e:
            result = text, e

            if left_operand and operator:
                # Attempt to parse a right operand
                try:
                    t, right_operand = parser.parse(text_after_bool_op,
                                                    cls.grammar[2])
                    result = t, SimpleValueBooleanQuery(
                        [left_operand, operator, right_operand])
                except SyntaxError as e:  # Actual failure of parsing boolean query at terminals level
                    return text, e

        return result
예제 #22
0
class LowerQuery(UnaryRule):
    grammar = (omit([Literal('<'), re.compile('before', re.I)],
                    _), attr('op', SpiresValue))
예제 #23
0
        omit(re.compile(r"or", re.I)),
        [
            (omit(Whitespace), attr('op', SpiresSimpleQuery)),
            (omit(_), attr('op', SpiresParenthesizedQuery)),
                (omit(Whitespace), attr('op', SpiresValueQuery)),
        ]
    )


SpiresQuery.grammar = attr('children', (
    [
        SpiresParenthesizedQuery,
        SpiresSimpleQuery,
    ],
    maybe_some((
        omit(_),
        [
            SpiresNotQuery,
            SpiresAndQuery,
            SpiresOrQuery,
        ]
    )),
))


class NestableKeyword(LeafRule):
    grammar = attr('value', [
        re.compile('refersto', re.I),
        re.compile('citedby', re.I),
    ])
예제 #24
0
 def runTest(self):
     r = pypeg2.parse("hello", pypeg2.omit(pypeg2.word))
     self.assertEqual(r, None)
예제 #25
0
 def runTest(self):
     t = pypeg2.compose('hello', pypeg2.omit(pypeg2.word))
     self.assertEqual(t, "")
예제 #26
0
    ParenthesizedQuery,
    SimpleQuery,
])


NestedKeywordQuery.grammar = \
    attr('left', [
        # Most specific regex must be higher.
        re.compile(r'citedbyexcludingselfcites', re.IGNORECASE),
        re.compile(r'citedbyx', re.IGNORECASE),
        re.compile(r'citedby', re.IGNORECASE),
        re.compile(r'referstoexcludingselfcites', re.IGNORECASE),
        re.compile(r'referstox', re.IGNORECASE),
        re.compile(r'refersto', re.IGNORECASE),
    ]), \
    optional(omit(":")), \
    attr('right', Expression)


class BooleanQuery(BooleanRule):
    """Represents boolean query as a binary rule.

    """
    grammar = Expression, [And, Or, None], Statement


# ########################

# #### Main productions ####
Statement.grammar = attr('op', [BooleanQuery, Expression])
예제 #27
0
class SimpleValueNegation(UnaryRule):
    """Negation accepting only SimpleValues."""
    grammar = omit(Not), attr('op', SimpleValue)
예제 #28
0
 class Rule(object):
     grammar = attr('value', SpiresSimpleValue), omit(re.compile(".*"))
예제 #29
0

class ValueQuery(UnaryRule):
    grammar = attr("op", Value)


class Query(ListRule):
    pass


class KeywordQuery(BinaryRule):
    pass


KeywordQuery.grammar = [
    (attr("left", KeywordRule), omit(_, Literal(":"), _), attr("right", KeywordQuery)),
    (attr("left", KeywordRule), omit(_, Literal(":"), _), attr("right", Value)),
    (attr("left", KeywordRule), omit(_, Literal(":"), _), attr("right", Query)),
]


class SimpleQuery(UnaryRule):
    grammar = attr("op", [KeywordQuery, ValueQuery])


class ParenthesizedQuery(UnaryRule):
    grammar = (omit(Literal("("), _), attr("op", Query), omit(_, Literal(")")))


class NotQuery(UnaryRule):
    grammar = [
예제 #30
0
class FindQuery(UnaryRule):
    grammar = omit(Find, Whitespace), attr('op', SpiresQuery)
                              "|".join([x + ":"
                                        for x in SPIRES_KEYWORDS.keys()])))


class KeywordQuery(BinaryRule):
    pass


class EmptyQueryRule(LeafRule):
    grammar = attr('value', re.compile(r'\s*'))


KeywordQuery.grammar = [
    (
        attr('left', KeywordRule),
        omit(_, Literal(':'), _),
        # FIXME: This should be replaced with KeywordQuery to restore
        # intented functionality.
        # Also NestedKeywordsRule class should be removed from
        # this file, ./ast.py and ./walkers/pypeg_to_ast.py.
        attr('right', NestedKeywordsRule)
    ),
    (
        attr('left', KeywordRule),
        omit(_, Literal(':'), _),
        attr('right', Value)
    ),
    (
        attr('left', KeywordRule),
        omit(_, Literal(':'), _),
        attr('right', Query)
예제 #32
0
class NotQuery(UnaryRule):
    """Negation query."""
    grammar = omit(Not), attr('op', Expression)
예제 #33
0
class ParenthesizedQuery(UnaryRule):
    """Parenthesized query for denoting precedence."""
    grammar = omit(Literal('(')), attr('op', Statement), omit(Literal(')'))
예제 #34
0
class LowerEqualQuery(UnaryRule):
    grammar = [
        (omit(Literal('<='), _), attr('op', SpiresValue)),
        (attr('op', Number), omit(re.compile(r'\-(?=\s|\)|$)'))),
    ]
예제 #35
0
class SpiresParenthesizedQuery(UnaryRule):
    grammar = (
        omit(Literal('('), _),
        attr('op', SpiresQuery),
        omit(_, Literal(')')),
    )
예제 #36
0
    grammar = re.compile(r'[^{]+')


class CommentStatement(LeafRule):
    grammar = re.compile(r"{\*.*?\*}", re.S)


class LiteralStatement(LeafRule):
    grammar = re.compile("{literal}.*?{/literal}", re.S)


class Whitespace(object):
    grammar = maybe_some([Literal(' '), Literal('\n'), Literal('\t')])


_ = omit(Whitespace)


class Identifier(LeafRule):
    grammar = re.compile(r'[\w\-\+\*\/]*\w')


"""
Logical Operators.
"""

class AndOperator(EmptyLeafRule):
    grammar = [Literal('and'), Literal('&&')]


class OrOperator(EmptyLeafRule):