Ejemplo n.º 1
0
# Basis characters (by exclusion) for variable / field names.  The following
# list of characters is from the btparse documentation
any_name = Regex('[^\s"#%\'(),={}]+')

# btparse says, and the test bibs show by experiment, that macro and field names
# cannot start with a digit.  In fact entry type names cannot start with a digit
# either (see tests/bibs). Cite keys can start with a digit
not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*')

# Comment comments out to end of line
comment = (AT + CaselessLiteral('comment') +
           Regex("[\s{(].*").leaveWhitespace())

# The name types with their digiteyness
not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower())
macro_def = not_dig_lower.copy()
macro_ref = not_dig_lower.copy().setParseAction(lambda t: Macro(t[0].lower()))
field_name = not_dig_lower.copy()
# Spaces in names mean they cannot clash with field names
entry_type = not_dig_lower('entry_type')
cite_key = any_name('cite_key')
# Number has to be before macro name
string = (number | macro_ref | quoted_string | curly_string)

# There can be hash concatenation
field_value = string + ZeroOrMore(HASH + string)
field_def = Group(field_name + EQUALS + field_value)
entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))

# Entry is surrounded either by parentheses or curlies
# Basis characters (by exclusion) for variable / field names.  The following
# list of characters is from the btparse documentation
any_name = Regex('[^\s"#%\'(),={}]+')

# btparse says, and the test bibs show by experiment, that macro and field names
# cannot start with a digit.  In fact entry type names cannot start with a digit
# either (see tests/bibs). Cite keys can start with a digit
not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*')

# Comment comments out to end of line
comment = (AT + CaselessLiteral('comment') +
           Regex("[\s{(].*").leaveWhitespace())

# The name types with their digiteyness
not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower())
macro_def = not_dig_lower.copy()
macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower()))
field_name = not_dig_lower.copy()
# Spaces in names mean they cannot clash with field names
entry_type = not_dig_lower('entry_type')
cite_key = any_name('cite_key')
# Number has to be before macro name
string = (number | macro_ref | quoted_string | curly_string)

# There can be hash concatenation
field_value = string + ZeroOrMore(HASH + string)
field_def = Group(field_name + EQUALS + field_value)
entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def))

# Entry is surrounded either by parentheses or curlies
valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word")
valid_word.setParseAction(
    lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\')
    )

string = QuotedString('"')

required_modifier = Literal("+")("required")
prohibit_modifier = Literal("-")("prohibit")
integer = Regex(r"\d+").setParseAction(lambda t:int(t[0]))
proximity_modifier = Group(TILDE + integer("proximity"))
number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0]))
fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy")

term = Forward()
field_name = valid_word.copy().setName("fieldname")
incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
boost = (CARAT + number("boost"))

string_expr = Group(string + proximity_modifier) | string
word_expr = Group(valid_word + fuzzy_modifier) | valid_word
term << (Optional(field_name("field") + COLON) + 
         (word_expr | string_expr | range_search | Group(LPAR + expression + RPAR)) +
         Optional(boost))
term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None)
    
expression << operatorPrecedence(term,
    [
    (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT),