Example #1
0
    def __init__(self, parse_method=None, precedence=9):
        gt_lt_e = Literal("<") ^ Literal("<=") ^ Literal(">") ^ Literal(">=")
        And.__init__(self, gt_lt_e + Integer())
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Example #2
0
    def __init__(self, parse_method=None, precedence=9):
        gt_lt_e = Literal('<') ^ Literal("<=") ^ Literal('>') ^ Literal(">=")
        And.__init__(self, [gt_lt_e + Integer()])
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Example #3
0
    def load_symbols_file(self, file):
        file = self.xkb_basedir / file
        try:
            return self.loaded[file]
        except KeyError:
            pass

        sections = []

        def quoted(name):
            return QuotedString(quoteChar='"', unquoteResults=True)

        # Callback, toks[0] is "foo" for xkb_symbols "foo"
        def new_symbols_section(name, loc, toks):
            assert len(toks) == 1
            sections.append(XkbSymbols(file, toks[0]))

        # Callback, toks[0] is "foo(bar)" for include "foo(bar)"
        def append_includes(name, loc, toks):
            assert len(toks) == 1
            sections[-1].includes.append(toks[0])

        EOL = LineEnd().suppress()
        SECTIONTYPE = (
            "default",
            "partial",
            "hidden",
            "alphanumeric_keys",
            "modifier_keys",
            "keypad_keys",
            "function_keys",
            "alternate_group",
        )
        NAME = quoted("name").setParseAction(new_symbols_section)
        INCLUDE = (lit("include") +
                   quoted("include").setParseAction(append_includes) + EOL)
        # We only care about includes
        OTHERLINE = And([~lit("};"), ~lit("include") + Regex(".*")]) + EOL

        with open(file) as fd:
            types = OneOrMore(oneOf(SECTIONTYPE)).suppress()
            include_or_other = Or([INCLUDE, OTHERLINE.suppress()])
            section = (types + lit("xkb_symbols") + NAME + lit("{") +
                       OneOrMore(include_or_other) + lit("};"))
            grammar = OneOrMore(section)
            grammar.ignore(cppStyleComment)
            try:
                result = grammar.parseFile(fd)
            except ParseException as e:
                raise XkbLoader.XkbParserException(str(e))

        self.loaded[file] = sections

        return sections
Example #4
0
    def __init__(self,
                 range_parse_method=None,
                 item_parse_method=None,
                 range_symbol='..',
                 precedence=10):
        And.__init__(
            self,
            Integer(item_parse_method) + Literal(range_symbol) +
            Integer(item_parse_method))
        BaseType.__init__(self, precedence)

        if range_parse_method:
            self.addParseAction(range_parse_method)
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
    elementList = []
    for element in antlrAlternative.elements:
        rule = None
        if hasattr(element.atom, "c1") and element.atom.c1 != "":
            regex = r"[" + str(element.atom.c1[0]) + "-" + str(element.atom.c2[0] + "]")
            rule = Regex(regex)("anonymous_regex")
        elif hasattr(element, "block") and element.block != "":
            rule = __antlrAlternativesConverter(pyparsingRules, element.block)
        else:
            ruleRef = element.atom[0]
            assert ruleRef in pyparsingRules
            rule = pyparsingRules[ruleRef](ruleRef)
        if hasattr(element, "op") and element.op != "":
            if element.op == "+":
                rule = Group(OneOrMore(rule))("anonymous_one_or_more")
            elif element.op == "*":
                rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more")
            elif element.op == "?":
                rule = Optional(rule)
            else:
                raise Exception("rule operator not yet implemented : " + element.op)
        rule = rule
        elementList.append(rule)
    if len(elementList) > 1:
        rule = Group(And(elementList))("anonymous_and")
    else:
        rule = elementList[0]
    assert rule is not None
    return rule
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
    elementList = []
    for element in antlrAlternative.elements:
        rule = None
        if hasattr(element.atom, 'c1') and element.atom.c1 != '':
            regex = r'[' + str(
                element.atom.c1[0]) + '-' + str(element.atom.c2[0] + ']')
            rule = Regex(regex)("anonymous_regex")
        elif hasattr(element, 'block') and element.block != '':
            rule = __antlrAlternativesConverter(pyparsingRules, element.block)
        else:
            ruleRef = element.atom
            assert ruleRef in pyparsingRules
            rule = pyparsingRules[element.atom](element.atom)
        if hasattr(element, 'op') and element.op != '':
            if element.op == '+':
                rule = Group(OneOrMore(rule))("anonymous_one_or_more")
            elif element.op == '*':
                rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more")
            elif element.op == '?':
                rule = Optional(rule)
            else:
                raise Exception('rule operator not yet implemented : ' +
                                element.op)
        rule = rule
        elementList.append(rule)
    if len(elementList) > 1:
        rule = Group(And(elementList))("anonymous_and")
    else:
        rule = elementList[0]
    assert rule != None
    return rule
Example #7
0
    def __init__(self):
        self.continued = False

        ds9_attr_parser = get_ds9_attr_parser()

        ds9_shape_in_comment_defs = dict(text=wcs_shape(CoordOdd, CoordEven),
                                         vector=wcs_shape(CoordOdd, CoordEven,
                                                          Distance, Angle),
                                         composite=wcs_shape(CoordOdd, CoordEven, Angle),
                                         projection=wcs_shape(CoordOdd, CoordEven, CoordOdd, CoordEven, Distance),
                                         segment=wcs_shape(CoordOdd, CoordEven,
                                                           repeat=(0,2)),
                                         )
        regionShape = define_shape_helper(ds9_shape_in_comment_defs)
        regionShape = regionShape.setParseAction(lambda s, l, tok: Shape(tok[0], tok[1:]))


        self.parser_default = ds9_attr_parser

        cont = CaselessKeyword("||").setParseAction(self.set_continued).suppress()
        line = Optional(And([regionShape,
                             Optional(cont)])) \
                             + ds9_attr_parser

        self.parser_with_shape = line
Example #8
0
def nest(*content):
    """Define a delimited list by enumerating each element of the list."""
    if len(content) == 0:
        raise ValueError('no arguments supplied')
    return And([LPF, content[0]] +
               list(itt.chain.from_iterable(zip(itt.repeat(C), content[1:]))) +
               [RPF])
Example #9
0
def build(parsers: dict):
    comma = Literal(",")
    rb = Literal(")")
    lb = Literal("(")
    srb = Literal("]")
    slb = Literal("[")
    number = Regex(r"0|[1-9][0-9]*")
    string = quotedString()
    name = Word(alphanums)
    label = Keyword(STRONG) | Keyword(WEAK) | Literal(SHORT_WEAK)
    param = Combine(Keyword(PARAM) + slb + number + srb)
    marker = Keyword(RESULT) | Keyword(TRUE) | Keyword(FALSE) | Keyword(
        THIS) | Keyword(_THIS) | param
    function = Keyword(GET)
    get = Literal(GETATTR)
    operator1 = Literal(MUL) | Literal(DIV) | Literal(MOD)
    operator2 = Literal(ADD) | Literal(SUB)
    operator3 = Literal(EQUAL) | Literal(NOT_EQUAL)
    operator3 |= And(Keyword(word) for word in IS_NOT.split(" ")) | Keyword(IS)
    operator4 = Literal(GREATER_OR_EQUAL) | Literal(GREATER) | Literal(
        LOWER_OR_EQUAL) | Literal(LOWER)
    operator5 = Keyword(AND)
    operator6 = Keyword(OR)
    operator7 = Keyword(FOLLOW)

    expression = Forward()
    string_st = string.setParseAction(parsers[STRING])
    name_st = name.setParseAction(parsers[STRING])
    marker_st = marker.setParseAction(parsers[MARKER])
    tuple_st = expression + ZeroOrMore(comma + expression)
    round_invocation_st = (lb + Optional(tuple_st) + rb).setParseAction(
        parsers[INVOCATION])
    function_st = (function + Suppress(round_invocation_st)).setParseAction(
        parsers[FUNCTION])
    getattr_st = (marker_st | name_st) + OneOrMore(
        (get + Suppress(name_st)).setParseAction(parsers[OPERATOR]))
    atom_st = (lb + expression +
               rb) | function_st | string_st | getattr_st | marker_st
    operator_st = atom_st + ZeroOrMore(
        (operator1 + Suppress(atom_st)).setParseAction(parsers[OPERATOR]))
    operator_st = operator_st + ZeroOrMore(
        (operator2 + Suppress(operator_st)).setParseAction(parsers[OPERATOR]))
    operator_st = operator_st + ZeroOrMore(
        (operator3 + Suppress(operator_st)).setParseAction(parsers[OPERATOR]))
    operator_st = operator_st + ZeroOrMore(
        (operator4 + Suppress(operator_st)).setParseAction(parsers[OPERATOR]))
    operator_st = operator_st + ZeroOrMore(
        (operator5 + Suppress(operator_st)).setParseAction(parsers[OPERATOR]))
    operator_st = operator_st + ZeroOrMore(
        (operator6 + Suppress(operator_st)).setParseAction(parsers[OPERATOR]))
    operator_st = operator_st + ZeroOrMore(
        (operator7 + Suppress(operator_st)).setParseAction(parsers[OPERATOR]))
    expression << operator_st

    getattr_st.enablePackrat()

    statement = (Optional(label, STRONG) +
                 Suppress(expression)).setParseAction(parsers[LABEL])
    return ZeroOrMore(statement) + StringEnd()
Example #10
0
def get_fragment_language() -> ParserElement:
    """Build a protein fragment parser."""
    _fragment_value_inner = fragment_range | missing_fragment(FRAGMENT_MISSING)
    _fragment_value = _fragment_value_inner | And(
        [Suppress('"'), _fragment_value_inner,
         Suppress('"')])
    parser_element = fragment_tag + nest(
        _fragment_value + Optional(WCW + quote(FRAGMENT_DESCRIPTION)))
    return parser_element
Example #11
0
def define_shape(name, shape_args, args_repeat=None):
    lparen = Literal("(").suppress()
    rparen = Literal(")").suppress()
    comma = Literal(",").suppress()

    shape_name = CaselessKeyword(name)

    if args_repeat is None:
        shape_with_parens = And(
            [shape_name, lparen,
             as_comma_separated_list(shape_args), rparen])

        shape_with_spaces = shape_name + And(shape_args)

    else:
        n1, n2 = args_repeat
        sl = []

        ss = shape_args[:n1]
        if ss:
            sl.append(as_comma_separated_list(ss))

        ss = shape_args[n1:n2]
        if ss:
            ar = as_comma_separated_list(ss)
            if sl:
                sl.extend([comma + ar, ZeroOrMore(comma + ar)])
            else:
                sl.extend([ar, ZeroOrMore(comma + ar)])

        ss = shape_args[n2:]
        if ss:
            if sl:
                sl.extend([comma, as_comma_separated_list(ss)])
            else:
                sl.extend([as_comma_separated_list(ss)])

        sl = [shape_name, lparen] + sl + [rparen]

        shape_with_parens = And(sl)

        shape_with_spaces = shape_name + OneOrMore(And(shape_args))

    return (shape_with_parens | shape_with_spaces)
Example #12
0
def as_comma_separated_list(al):

    l = [al[0]]
    comma = Literal(",").suppress()

    for a1 in al[1:]:
        l.append(comma)
        l.append(a1)

    return And(l)
Example #13
0
def get_fragment_language():
    _fragment_value_inner = fragment_range | missing_fragment(FRAGMENT_MISSING)

    _fragment_value = (_fragment_value_inner | And(
        [Suppress('"'), _fragment_value_inner,
         Suppress('"')]))

    language = fragment_tag + nest(_fragment_value +
                                   Optional(WCW + quote(FRAGMENT_DESCRIPTION)))

    return language
Example #14
0
    def _parse_ascii_data(self, header_tokens, file_mmap, buffer_offset,
                          aggregate_data_types):
        """
        Parse the data portion of a PLY file assuming it uses ASCII format.

        :param header_tokens:
        :param file_mmap:
        :param buffer_offset:
        :param aggregate_data_types:
        :return:
        """
        # Define the grammar of the body
        number = pyparsing_common.number()
        body_expr = list()
        for element in header_tokens.elements:
            sequences = list()
            for prop in element.properties:
                for variable in prop:
                    if "index_type" in variable:
                        sequences.append(countedArray(number))
                    else:
                        sequences.append(number(variable.name))

            element_data = Group(And(sequences))
            body_expr.append(Group(element_data * element.count)(element.name))

        ascii_grammar = And(body_expr)

        # Tokenize the body data
        body_tokens = ascii_grammar.parseString(
            file_mmap[buffer_offset:].decode("ascii"), parseAll=True)

        # Convert the data to arrays.
        element_data = dict()
        for name, dtype in aggregate_data_types.items():
            element_data[name] = array.array(dtype,
                                             self._flatten(body_tokens[name]))

        return element_data
Example #15
0
def make_date_parser():

    date_expr = ppc.iso8601_date.copy()
    date_expr.setParseAction(ppc.convertToDate())

    expr_last = And(
        CaselessKeyword('LAST') & ppc.integer.setResultsName('n')
        & StringEnd()).setResultsName('interval').setParseAction(handle_last)

    expr_prev = And(
        CaselessKeyword('PREVIOUS') & Or(
            CaselessKeyword('DAY').setResultsName('day')
            | CaselessKeyword('WEEK').setResultsName('week')
            | CaselessKeyword('MONTH').setResultsName('month')) +
        StringEnd()).setResultsName('interval').setParseAction(handle_previous)

    expr_fromto_date = And(
        CaselessKeyword('FROM') + date_expr.setResultsName('start') +
        CaselessKeyword('TO') + date_expr.setResultsName('end') +
        StringEnd()).setResultsName('interval').setParseAction(handle_fromto)

    parser = expr_fromto_date | expr_last | expr_prev

    return parser
Example #16
0
    def __init__(self):

        RegionPusher.__init__(self)

        self.shape_definition = ds9_shape_defs
        regionShape = define_shape_helper(self.shape_definition)
        regionShape = regionShape.setParseAction(
            lambda s, l, tok: Shape(tok[0], tok[1:]))

        regionExpr = define_expr(
            regionShape,
            negate_func=lambda s, l, tok: tok[-1].set_exclude(),
        )

        coord_command_keys = [
            'PHYSICAL', 'IMAGE', 'FK4', 'B1950', 'FK5', 'J2000', 'GALACTIC',
            'ECLIPTIC', 'ICRS', 'LINEAR', 'AMPLIFIER', 'DETECTOR'
        ]

        coordCommandLiterals = define_simple_literals(coord_command_keys)
        coordCommandWCS = Combine(
            CaselessLiteral("WCS") + Optional(Word(alphas)))

        coordCommand = (coordCommandLiterals | coordCommandWCS)
        coordCommand.setParseAction(lambda s, l, tok: CoordCommand(tok[-1]))

        regionGlobal = comment_shell_like(CaselessKeyword("global"),
                                          lambda s, l, tok: Global(tok[-1]))

        regionAtom = (regionExpr | coordCommand | regionGlobal)

        regionAtom = regionAtom.setParseAction(self.pushAtom)

        regionComment = comment_shell_like(Literal("#"),
                                           parseAction=self.pushComment)

        line_simple = define_line(atom=regionAtom,
                                  separator=Literal(";"),
                                  comment=regionComment)

        line_w_composite = And([regionAtom,
                                CaselessKeyword("||").setParseAction(self.set_continued)
                                ]) \
                           + Optional(regionComment)

        line = Or([line_simple, line_w_composite])

        self.parser = Optional(line) + StringEnd()
Example #17
0
    def __init__(self):
        self.fragment_range = (ppc.integer | '?')(FRAGMENT_START) + '_' + (
            ppc.integer | '?' | '*')(FRAGMENT_STOP)
        self.missing_fragment = Keyword('?')(FRAGMENT_MISSING)

        self._fragment_value_inner = self.fragment_range | self.missing_fragment(
            FRAGMENT_MISSING)

        self._fragment_value = (self._fragment_value_inner | And(
            [Suppress('"'), self._fragment_value_inner,
             Suppress('"')]))

        self.language = fragment_tag + nest(
            self._fragment_value + Optional(WCW + quote(FRAGMENT_DESCRIPTION)))

        super(FragmentParser, self).__init__(self.language)
Example #18
0
    def __init__(self):
        self.continued = False

        ds9_attr_parser = get_ds9_attr_parser()

        regionShape = define_shape_helper(ds9_shape_in_comment_defs)
        regionShape = regionShape.setParseAction(
            lambda s, l, tok: Shape(tok[0], tok[1:]))

        self.parser_default = ds9_attr_parser

        cont = CaselessKeyword("||").setParseAction(
            self.set_continued).suppress()
        line = Optional(And([regionShape, Optional(cont)])) + ds9_attr_parser

        self.parser_with_shape = line
Example #19
0
def parse_value_comparison(op_string):
    """ Ex:
        10
        <=10
        <10
        >=10
        >10
    """
    num = Word(nums + ".") # matches 1 or 1.1
    ltgt = oneOf("< <= > >=")

    parser = num | And([ltgt, num])
    try:
        return parser.parseString(op_string).asList()
    except ParseException:
        #:TODO what should really happen here?
        raise ValueError("'%s' is not a Valid operation" % op_string)
Example #20
0
    def _aggregate_property(cls, name, prefix, *keywords):
        """
        Create a property group from the specified name,
        the pattern prefix (a ParseElement instance), and an iterable of keywords.

        Example:
        aggregate_property('position', CaselessKeyword('property'), *[CaselessKeyword('x'), ...])

        :param name:
        :param prefix:
        :param keywords:
        :return:
        """
        aggregates = list()
        for keyword in keywords:
            aggregates.append(Group(prefix + keyword("name")))

        return Group(And(aggregates))(name)
Example #21
0
    def __init__(self):

        RegionPusher.__init__(self)

        self.shape_definition = ds9_shape_defs
        regionShape = define_shape_helper(self.shape_definition)
        regionShape = regionShape.setParseAction(
            lambda s, l, tok: Shape(tok[0], tok[1:]))

        regionExpr = define_expr(
            regionShape,
            negate_func=lambda s, l, tok: tok[-1].set_exclude(),
        )

        coord_command_keys = "PHYSICAL IMAGE FK4 B1950 FK5 J2000 GALACTIC ECLIPTIC ICRS LINEAR AMPLIFIER DETECTOR".split(
        )

        coordCommand = define_simple_literals(
            coord_command_keys,
            parseAction=lambda s, l, tok: CoordCommand(tok[-1]))

        regionGlobal = comment_shell_like(CaselessKeyword("global"),
                                          lambda s, l, tok: Global(tok[-1]))

        regionAtom = (regionExpr | coordCommand | regionGlobal)

        regionAtom = regionAtom.setParseAction(self.pushAtom)

        regionComment = comment_shell_like(Literal("#"),
                                           parseAction=self.pushComment)

        line_simple = define_line(atom=regionAtom,
                                  separator=Literal(";"),
                                  comment=regionComment)

        line_w_composite = And([regionAtom,
                                CaselessKeyword("||").setParseAction(self.set_continued)
                                ]) \
                           + Optional(regionComment)

        line = Or([line_simple, line_w_composite])

        self.parser = Optional(line) + StringEnd()
Example #22
0
    def __init__(
        self,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        citation_clearing: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Initialize the control statement parser.

        :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing
        :param annotation_to_pattern: A dictionary of {annotation: regular expression string}
        :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
        :param required_annotations: Annotations that are required
        """
        self.citation_clearing = citation_clearing

        self.annotation_to_term = annotation_to_term or {}
        self.annotation_to_pattern = annotation_to_pattern or {}
        self.annotation_to_local = annotation_to_local or {}

        self.statement_group = None
        self.citation_db = None
        self.citation_db_id = None
        self.evidence = None
        self.annotations = {}
        self.required_annotations = required_annotations or []

        annotation_key = ppc.identifier('key').setParseAction(
            self.handle_annotation_key)

        self.set_statement_group = set_statement_group_stub().setParseAction(
            self.handle_set_statement_group)
        self.set_citation = set_citation_stub.setParseAction(
            self.handle_set_citation)
        self.set_evidence = set_evidence_stub.setParseAction(
            self.handle_set_evidence)

        set_command_prefix = And([annotation_key('key'), Suppress('=')])
        self.set_command = set_command_prefix + qid('value')
        self.set_command.setParseAction(self.handle_set_command)

        self.set_command_list = set_command_prefix + delimited_quoted_list(
            'values')
        self.set_command_list.setParseAction(self.handle_set_command_list)

        self.unset_command = annotation_key('key')
        self.unset_command.addParseAction(self.handle_unset_command)

        self.unset_evidence = supporting_text_tags(EVIDENCE)
        self.unset_evidence.setParseAction(self.handle_unset_evidence)

        self.unset_citation = Suppress(BEL_KEYWORD_CITATION)
        self.unset_citation.setParseAction(self.handle_unset_citation)

        self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP)
        self.unset_statement_group.setParseAction(
            self.handle_unset_statement_group)

        self.unset_list = delimited_unquoted_list('values')
        self.unset_list.setParseAction(self.handle_unset_list)

        self.unset_all = unset_all.setParseAction(self.handle_unset_all)

        self.set_statements = set_tag + MatchFirst([
            self.set_statement_group,
            self.set_citation,
            self.set_evidence,
            self.set_command,
            self.set_command_list,
        ])

        self.unset_statements = unset_tag + MatchFirst([
            self.unset_all,
            self.unset_citation,
            self.unset_evidence,
            self.unset_statement_group,
            self.unset_command,
            self.unset_list,
        ])

        self.language = self.set_statements | self.unset_statements

        super(ControlParser, self).__init__(self.language)
Example #23
0
    def __init__(
        self,
        manager,
        namespace_to_term_to_encoding: Optional[
            NamespaceTermEncodingMapping] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        default_namespace: Optional[Set[str]] = None,
        allow_redefinition: bool = False,
        skip_validation: bool = False,
        upgrade_urls: bool = False,
    ) -> None:
        """Build a metadata parser.

        :param manager: A cache manager
        :param namespace_to_term_to_encoding:
          An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}}
        :param namespace_to_pattern:
          A regular expression namespace mapping from {namespace keyword: regex string}
        :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values}
        :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string}
        :param default_namespace: A set of strings that can be used without a namespace
        :param skip_validation: If true, don't download and cache namespaces/annotations
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager
        self.disallow_redefinition = not allow_redefinition
        self.skip_validation = skip_validation
        self.upgrade_urls = upgrade_urls

        #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}}
        self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {}
        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_to_pattern = namespace_to_pattern or {}
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None

        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_term = annotation_to_term or {}
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_to_pattern = annotation_to_pattern or {}
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_local = annotation_to_local or {}

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value'),
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document,
            self.namespace_url,
            self.annotation_url,
            self.annotation_list,
            self.annotation_pattern,
            self.namespace_pattern,
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)
Example #24
0
    def __init__(
        self,
        graph,
        namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        allow_naked_names: bool = False,
        allow_nested: bool = False,
        disallow_unqualified_translocations: bool = False,
        citation_clearing: bool = True,
        skip_validation: bool = False,
        autostreamline: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Build a BEL parser.

        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param allow_nested: If true, turn off nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param autostreamline: Should the parser be streamlined on instantiation?
        :param required_annotations: Optional list of required annotations
        """
        self.graph = graph

        self.allow_nested = allow_nested
        self.disallow_unqualified_translocations = disallow_unqualified_translocations

        if skip_validation:
            self.control_parser = ControlParser(
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.control_parser = ControlParser(
                annotation_to_term=annotation_to_term,
                annotation_to_pattern=annotation_to_pattern,
                annotation_to_local=annotation_to_local,
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names,
                namespace_to_term=namespace_to_term,
                namespace_to_pattern=namespace_to_pattern,
            )

        self.control_parser.get_line_number = self.get_line_number
        self.identifier_parser.get_line_number = self.get_line_number

        identifier = Group(self.identifier_parser.language)(IDENTIFIER)
        ungrouped_identifier = self.identifier_parser.language

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = get_protein_modification_language(
            self.identifier_parser.identifier_qualified)

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = get_location_language(self.identifier_parser.language)
        opt_location = pyparsing.Optional(WCW + self.location)

        #: PyBEL BEL Specification variant
        self.gmod = get_gene_modification_language(
            self.identifier_parser.identifier_qualified)

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = get_fusion_language(self.identifier_parser.language)

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(
            ungrouped_identifier + opt_location)

        self.gene_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS))

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'c'))(FUSION)

        #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_
        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion, self.gene_fusion_legacy, self.gene_modified
            ]) + opt_location)

        self.mirna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location

        #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_
        self.mirna = mirna_tag + nest(self.mirna_modified)

        self.protein_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(
                Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))
            (VARIANTS))

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'p'))(FUSION)

        #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_
        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location)

        self.rna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'r'))(FUSION)

        #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_
        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location)

        self.single_abundance = MatchFirst([
            self.general_abundance, self.gene, self.mirna, self.protein,
            self.rna
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(ungrouped_identifier +
                                                    opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location)

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location)

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default)

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.identifier_parser.language)

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(
            ungrouped_identifier)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(ungrouped_identifier)

        self.bp_path = self.biological_process | self.pathology
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)))

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_
        self.activity = self.activity_standard | self.activity_legacy

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT))

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT))

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if self.disallow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_
        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified, self.translocation_standard,
            self.translocation_legacy
        ])

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion, self.cell_surface_expression,
            self.translocation, self.degradation, self.reaction
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag, self.biological_process)

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag, self.process)

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        self.has_component = triple(
            self.complex_abundances | self.composite_abundance,
            has_component_tag, self.abundance)

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])

        self.relation.setParseAction(self._handle_relation_harness)

        self.unqualified_relation = MatchFirst([
            self.has_member, self.has_component, self.has_variant_relation,
            self.part_of_reaction
        ])

        self.unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested. Not enabled by default.
        causal_relation_tags = MatchFirst([
            increases_tag, decreases_tag, directly_decreases_tag,
            directly_increases_tag
        ])

        self.nested_causal_relationship = triple(
            self.bel_term, causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)))

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        self.label_relationship = And([
            Group(self.bel_term)(SUBJECT),
            Suppress('labeled'),
            quote(OBJECT)
        ])
        self.label_relationship.setParseAction(self.handle_label_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.unqualified_relation,
            self.label_relationship,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BELParser, self).__init__(self.language,
                                        streamline=autostreamline)
Example #25
0
    EVIDENCE,
)
from ..utils import citation_dict

__all__ = ['ControlParser']

logger = logging.getLogger(__name__)

set_tag = Suppress(BEL_KEYWORD_SET)
unset_tag = Suppress(BEL_KEYWORD_UNSET)
unset_all = Suppress(BEL_KEYWORD_ALL)

supporting_text_tags = oneOf([BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT])

set_statement_group_stub = And(
    [Suppress(BEL_KEYWORD_STATEMENT_GROUP),
     Suppress('='),
     qid('group')])
set_citation_stub = And([
    Suppress(BEL_KEYWORD_CITATION),
    Suppress('='),
    delimited_quoted_list('values')
])
set_evidence_stub = And(
    [Suppress(supporting_text_tags),
     Suppress('='),
     quote('value')])


class ControlParser(BaseParser):
    """A parser for BEL control statements.
Example #26
0
def create_grammar(container_ids, secret_ids):
    """ Create the grammar for the editfile """
    from pyparsing import (nums, alphas, lineEnd, stringEnd,
                            OneOrMore, ZeroOrMore, SkipTo, Optional, And,
                            Word, CharsNotIn, Empty, QuotedString, Literal,
                            Suppress, Group, Combine, originalTextFor, Forward,
                            ParserElement)
    # Read from bottom to top
    whiteSpaceChars = ' \t'
    ParserElement.setDefaultWhitespaceChars(whiteSpaceChars)
    word = Empty() + CharsNotIn(whiteSpaceChars + '\n')
    quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction(
                        # NOTE the second replace is a work-around for
                        #      pyparsing bug #68.
                        #       https://sourceforge.net/p/pyparsing/bugs/68/
                        lambda s,l,t: t[0].replace("\\n", "\n").replace(
                                                   "\\\\", "\\"))
    def secretIdNumberParseAction(s, loc, tokens):
        v = int(tokens[0])
        if not v in secret_ids:
            raise ParseException(s, loc, "Not a valid secret id")
        return v
    secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction)
    def containerIdParseAction(s, loc, tokens):
        v = int(tokens[0])
        if not v in container_ids:
            raise ParseException(s, loc, "Not a valid container id")
        return v
    containerId = Word(nums).setParseAction(containerIdParseAction)
    key = quotedString | word
    secretString = ~Literal('#') + (quotedString | word)
    secretId = Suppress('#') + secretIdNumber
    secret = secretString | secretId
    note = quotedString | originalTextFor(OneOrMore(word))
    containerKeyword = Suppress('CONTAINER')
    entry = (~containerKeyword + Group(key - secret - Optional(note))
                - Suppress(lineEnd))
    comment = Suppress(lineEnd | '#' + SkipTo(lineEnd))
    line = comment | entry
    containerLine = containerKeyword + containerId + comment
    # Instead of the following recursive grammar, we could have simply used
    #
    #     containerBlock = ZeroOrMore(comment) + Group(containerLine
    #                                                + Group(OneOrMore(line)))
    #     multipleContainers = OneOrMore(containerBlock)
    #
    # but then any parsing error in line will result in a "expected stringEnd"
    # or "expected CONTAINER".
    _multipleContainers_head = Forward()
    _multipleContainers_body = Forward()
    _multipleContainers_head << (stringEnd | comment + _multipleContainers_head
                    | containerLine + _multipleContainers_body)
    _multipleContainers_body << (stringEnd
                    | (containerLine | line) + _multipleContainers_body)
    _multipleContainers_entry = And([entry])
    multipleContainers = And([_multipleContainers_head]) # TODO ibidem below
    containerLine.setParseAction(lambda s,l,t: [[None, t[0]]])
    def multipleContainersParseAction(s, loc, tokens):
        curEntries = []
        curId = None
        ret = []
        for t in tuple(tokens) + ((None, None),):
            if t[0] is not None:
                assert curId is not None
                curEntries.append(t)
                continue
            if curId is not None:
                ret.append([curId, curEntries])
            curId = t[1]
            curEntries = []
        return ret
    multipleContainers.setParseAction(multipleContainersParseAction)
    oneContainer = ZeroOrMore(line) + stringEnd
    oneContainer.setParseAction(lambda s,l,t: [[None, t]])
    grammar = multipleContainers | oneContainer
    return grammar
Example #27
0
class BELParser(BaseParser):
    """Build a parser backed by a given dictionary of namespaces."""
    def __init__(
        self,
        graph,
        namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        allow_naked_names: bool = False,
        allow_nested: bool = False,
        disallow_unqualified_translocations: bool = False,
        citation_clearing: bool = True,
        skip_validation: bool = False,
        autostreamline: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Build a BEL parser.

        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param allow_nested: If true, turn off nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param autostreamline: Should the parser be streamlined on instantiation?
        :param required_annotations: Optional list of required annotations
        """
        self.graph = graph

        self.allow_nested = allow_nested
        self.disallow_unqualified_translocations = disallow_unqualified_translocations

        if skip_validation:
            self.control_parser = ControlParser(
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.control_parser = ControlParser(
                annotation_to_term=annotation_to_term,
                annotation_to_pattern=annotation_to_pattern,
                annotation_to_local=annotation_to_local,
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names,
                namespace_to_term=namespace_to_term,
                namespace_to_pattern=namespace_to_pattern,
            )

        self.control_parser.get_line_number = self.get_line_number
        self.identifier_parser.get_line_number = self.get_line_number

        identifier = Group(self.identifier_parser.language)(IDENTIFIER)
        ungrouped_identifier = self.identifier_parser.language

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = get_protein_modification_language(
            self.identifier_parser.identifier_qualified)

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = get_location_language(self.identifier_parser.language)
        opt_location = pyparsing.Optional(WCW + self.location)

        #: PyBEL BEL Specification variant
        self.gmod = get_gene_modification_language(
            self.identifier_parser.identifier_qualified)

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = get_fusion_language(self.identifier_parser.language)

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(
            ungrouped_identifier + opt_location)

        self.gene_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS))

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'c'))(FUSION)

        #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_
        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion, self.gene_fusion_legacy, self.gene_modified
            ]) + opt_location)

        self.mirna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location

        #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_
        self.mirna = mirna_tag + nest(self.mirna_modified)

        self.protein_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(
                Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))
            (VARIANTS))

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'p'))(FUSION)

        #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_
        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location)

        self.rna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'r'))(FUSION)

        #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_
        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location)

        self.single_abundance = MatchFirst([
            self.general_abundance, self.gene, self.mirna, self.protein,
            self.rna
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(ungrouped_identifier +
                                                    opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location)

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location)

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default)

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.identifier_parser.language)

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(
            ungrouped_identifier)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(ungrouped_identifier)

        self.bp_path = self.biological_process | self.pathology
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)))

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_
        self.activity = self.activity_standard | self.activity_legacy

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT))

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT))

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if self.disallow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_
        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified, self.translocation_standard,
            self.translocation_legacy
        ])

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion, self.cell_surface_expression,
            self.translocation, self.degradation, self.reaction
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag, self.biological_process)

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag, self.process)

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        self.has_component = triple(
            self.complex_abundances | self.composite_abundance,
            has_component_tag, self.abundance)

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])

        self.relation.setParseAction(self._handle_relation_harness)

        self.unqualified_relation = MatchFirst([
            self.has_member, self.has_component, self.has_variant_relation,
            self.part_of_reaction
        ])

        self.unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested. Not enabled by default.
        causal_relation_tags = MatchFirst([
            increases_tag, decreases_tag, directly_decreases_tag,
            directly_increases_tag
        ])

        self.nested_causal_relationship = triple(
            self.bel_term, causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)))

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        self.label_relationship = And([
            Group(self.bel_term)(SUBJECT),
            Suppress('labeled'),
            quote(OBJECT)
        ])
        self.label_relationship.setParseAction(self.handle_label_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.unqualified_relation,
            self.label_relationship,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BELParser, self).__init__(self.language,
                                        streamline=autostreamline)

    @property
    def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]:
        """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser."""
        return self.identifier_parser.namespace_to_terms

    @property
    def _allow_naked_names(self) -> bool:
        """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``)."""
        return self.identifier_parser.allow_naked_names

    def get_annotations(self) -> Dict:
        """Get the current annotations in this parser."""
        return self.control_parser.get_annotations()

    def clear(self):
        """Clear the graph and all control parser data (current citation, annotations, and statement group)."""
        self.graph.clear()
        self.control_parser.clear()

    def handle_nested_relation(self, line: str, position: int,
                               tokens: ParseResults):
        """Handle nested statements.

        If :code:`allow_nested` is False, raises a ``NestedRelationWarning``.

        :raises: NestedRelationWarning
        """
        if not self.allow_nested:
            raise NestedRelationWarning(self.get_line_number(), line, position)

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[SUBJECT],
                RELATION: tokens[RELATION],
                OBJECT: tokens[OBJECT][SUBJECT],
            })

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[OBJECT][SUBJECT],
                RELATION: tokens[OBJECT][RELATION],
                OBJECT: tokens[OBJECT][OBJECT],
            })
        return tokens

    def check_function_semantics(self, line: str, position: int,
                                 tokens: ParseResults) -> ParseResults:
        """Raise an exception if the function used on the tokens is wrong.

        :raises: InvalidFunctionSemantic
        """
        if not self._namespace_dict or NAMESPACE not in tokens:
            return tokens

        namespace, name = tokens[NAMESPACE], tokens[NAME]

        if namespace in self.identifier_parser.namespace_to_pattern:
            return tokens

        if self._allow_naked_names and tokens[
                NAMESPACE] == DIRTY:  # Don't check dirty names in lenient mode
            return tokens

        valid_functions = set(
            itt.chain.from_iterable(
                belns_encodings.get(k, set())
                for k in self._namespace_dict[namespace][name]))

        if not valid_functions:
            raise InvalidEntity(self.get_line_number(), line, position,
                                namespace, name)

        if tokens[FUNCTION] not in valid_functions:
            raise InvalidFunctionSemantic(self.get_line_number(), line,
                                          position, tokens[FUNCTION],
                                          namespace, name, valid_functions)

        return tokens

    def handle_term(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle BEL terms (the subject and object of BEL relations)."""
        self.ensure_node(tokens)
        return tokens

    def _handle_list_helper(self, tokens: ParseResults,
                            relation: str) -> ParseResults:
        """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`."""
        parent_node_dsl = self.ensure_node(tokens[0])

        for child_tokens in tokens[2]:
            child_node_dsl = self.ensure_node(child_tokens)
            self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl,
                                            relation)

        return tokens

    def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, HAS_MEMBER)

    def handle_has_components(self, _, __,
                              tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, HAS_COMPONENT)

    def _add_qualified_edge_helper(self, u, v, relation, annotations,
                                   subject_modifier, object_modifier) -> str:
        """Add a qualified edge from the internal aspects of the parser."""
        return self.graph.add_qualified_edge(
            u,
            v,
            relation=relation,
            evidence=self.control_parser.evidence,
            citation=self.control_parser.citation.copy(),
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
            **{LINE: self.get_line_number()})

    def _add_qualified_edge(self, u, v, relation, annotations,
                            subject_modifier, object_modifier) -> str:
        """Add an edge, then adds the opposite direction edge if it should."""
        sha512 = self._add_qualified_edge_helper(
            u,
            v,
            relation=relation,
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

        if relation in TWO_WAY_RELATIONS:
            self._add_qualified_edge_helper(
                v,
                u,
                relation=relation,
                annotations=annotations,
                object_modifier=subject_modifier,
                subject_modifier=object_modifier,
            )

        return sha512

    def _handle_relation(self, tokens: ParseResults) -> str:
        """Handle a relation."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])

        subject_modifier = modifier_po_to_dict(tokens[SUBJECT])
        object_modifier = modifier_po_to_dict(tokens[OBJECT])

        annotations = {
            annotation_name: ({ae: True
                               for ae in annotation_entry} if isinstance(
                                   annotation_entry, set) else {
                                       annotation_entry: True
                                   })
            for annotation_name, annotation_entry in
            self.control_parser.annotations.items()
        }

        return self._add_qualified_edge(
            subject_node_dsl,
            object_node_dsl,
            relation=tokens[RELATION],
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

    def _handle_relation_harness(
            self, line: str, position: int,
            tokens: Union[ParseResults, Dict]) -> ParseResults:
        """Handle BEL relations based on the policy specified on instantiation.

        Note: this can't be changed after instantiation!
        """
        if not self.control_parser.citation:
            raise MissingCitationException(self.get_line_number(), line,
                                           position)

        if not self.control_parser.evidence:
            raise MissingSupportWarning(self.get_line_number(), line, position)

        missing_required_annotations = self.control_parser.get_missing_required_annotations(
        )
        if missing_required_annotations:
            raise MissingAnnotationWarning(self.get_line_number(), line,
                                           position,
                                           missing_required_annotations)

        self._handle_relation(tokens)
        return tokens

    def handle_unqualified_relation(self, _, __,
                                    tokens: ParseResults) -> ParseResults:
        """Handle unqualified relations."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])
        relation = tokens[RELATION]
        self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl,
                                        relation)
        return tokens

    def handle_label_relation(self, line: str, position: int,
                              tokens: ParseResults) -> ParseResults:
        """Handle statements like ``p(X) label "Label for X"``.

        :raises: RelabelWarning
        """
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        description = tokens[OBJECT]

        if self.graph.has_node_description(subject_node_dsl):
            raise RelabelWarning(
                line_number=self.get_line_number(),
                line=line,
                position=position,
                node=self.graph.node,
                old_label=self.graph.get_node_description(subject_node_dsl),
                new_label=description)

        self.graph.set_node_description(subject_node_dsl, description)
        return tokens

    def ensure_node(self, tokens: ParseResults) -> BaseEntity:
        """Turn parsed tokens into canonical node name and makes sure its in the graph."""
        if MODIFIER in tokens:
            return self.ensure_node(tokens[TARGET])

        node = parse_result_to_dsl(tokens)
        self.graph.add_node_from_data(node)
        return node

    def handle_translocation_illegal(self, line: str, position: int,
                                     tokens: ParseResults) -> None:
        """Handle a malformed translocation."""
        raise MalformedTranslocationWarning(self.get_line_number(), line,
                                            position, tokens)
Example #28
0
    def __init__(self, parse_method=None, field_separator=":", precedence=11):
        And.__init__(self, FieldName() + Literal(field_separator))
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Example #29
0
    def __init__(self, range_parse_method=None, item_parse_method=None, range_symbol="..", precedence=10):
        And.__init__(self, Integer(item_parse_method) + Literal(range_symbol) + Integer(item_parse_method))
        BaseType.__init__(self, precedence)

        if range_parse_method:
            self.addParseAction(range_parse_method)
Example #30
0
    def __init__(self, parse_method=None, precedence=11):
        And.__init__(self, [QuotedString() + Literal('~') + Integer()])
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Example #31
0
    def __init__(self, parse_method=None, field_separator=':', precedence=11):
        And.__init__(self, [FieldName() + Literal(field_separator)])
        BaseType.__init__(self, precedence)

        if parse_method:
            self.addParseAction(parse_method)
Example #32
0
    else:
        message += crlf
    message.leaveWhitespace()


build_message()
dispatcher.connect(build_message, 'parser.trailing_spaces', 'config')
dispatcher.connect(build_message, 'parser.soft_eol', 'config')

chanstring = charclass((0x01, 0x06), (0x08, 0x09), (0x0B, 0x0C), (0x0E, 0x1F),
                       (0x21, 0x2B), (0x2D, 0x39), (0x3B, 0xFF))
channelid = 5 * (charclass((0x41, 0x5A)) ^ digit)

channel = And([
    Or([oneOf('# + &'), Literal('!') + Group(channelid)]),
    Group(OneOrMore(chanstring)),
    Optional(Suppress(Literal(':')) + Group(OneOrMore(chanstring)))
])

###
# Wildcard expressions
###
wildone = Literal('?')
wildmany = Literal('*')
nowild = charclass((0x01, 0x29), (0x2B, 0x3E), (0x40, 0xFF))
noesc = charclass((0x01, 0x5B), (0x5D, 0xFF))
mask = Optional(wildone ^ wildmany) + \
    ZeroOrMore(nowild ^ (noesc + wildone) ^ (noesc + wildmany))

# Fall back to regex for parsing wildcards
matchone = '[%s-%s]' % (chr(0x01), chr(0xFF))
Example #33
0
    def __init__(self,
                 manager,
                 namespace_dict=None,
                 annotation_dict=None,
                 namespace_regex=None,
                 annotation_regex=None,
                 default_namespace=None,
                 allow_redefinition=False):
        """
        :param pybel.manager.Manager manager: A cache manager
        :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from
                                {namespace keyword: {name: encoding}}
        :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from
                                {annotation keyword: set of valid values}
        :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from
                                {namespace keyword: regex string}
        :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from
                                {annotation keyword: regex string}
        :param set[str] default_namespace: A set of strings that can be used without a namespace
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager

        self.disallow_redefinition = not allow_redefinition

        #: A dictionary of cached {namespace keyword: {name: encoding}}
        self.namespace_dict = {} if namespace_dict is None else namespace_dict
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_dict = {} if annotation_dict is None else annotation_dict
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_regex = {} if namespace_regex is None else namespace_regex
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_regex = {} if annotation_regex is None else annotation_regex

        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {namespace keyword: OWL namespace URL}
        self.namespace_owl_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}
        #: A dictionary from {annotation keyword: OWL annotation URL}
        self.annotation_owl_dict = {}
        #: A set of annotation keywords that are defined ad-hoc in the BEL script
        self.annotation_lists = set()

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value')
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_owl = And([
            namespace_tag, owl_tag,
            Optional(function_tags('functions')),
            quote('url')
        ])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_owl = And([annotation_tag, owl_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_owl.setParseAction(self.handle_namespace_owl)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_owl.setParseAction(self.handle_annotation_owl)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document, self.namespace_url, self.namespace_owl,
            self.annotation_url, self.annotation_list, self.annotation_owl,
            self.annotation_pattern, self.namespace_pattern
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)
Example #34
0
class MetadataParser(BaseParser):
    """A parser for the document and definitions section of a BEL document.

    .. seealso::

        BEL 1.0 Specification for the `DEFINE <http://openbel.org/language/web/version_1.0/bel_specification_version_1.0.html#_define>`_ keyword
    """
    def __init__(self,
                 manager,
                 namespace_dict=None,
                 annotation_dict=None,
                 namespace_regex=None,
                 annotation_regex=None,
                 default_namespace=None,
                 allow_redefinition=False):
        """
        :param pybel.manager.Manager manager: A cache manager
        :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from
                                {namespace keyword: {name: encoding}}
        :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from
                                {annotation keyword: set of valid values}
        :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from
                                {namespace keyword: regex string}
        :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from
                                {annotation keyword: regex string}
        :param set[str] default_namespace: A set of strings that can be used without a namespace
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager

        self.disallow_redefinition = not allow_redefinition

        #: A dictionary of cached {namespace keyword: {name: encoding}}
        self.namespace_dict = {} if namespace_dict is None else namespace_dict
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_dict = {} if annotation_dict is None else annotation_dict
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_regex = {} if namespace_regex is None else namespace_regex
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_regex = {} if annotation_regex is None else annotation_regex

        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {namespace keyword: OWL namespace URL}
        self.namespace_owl_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}
        #: A dictionary from {annotation keyword: OWL annotation URL}
        self.annotation_owl_dict = {}
        #: A set of annotation keywords that are defined ad-hoc in the BEL script
        self.annotation_lists = set()

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value')
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_owl = And([
            namespace_tag, owl_tag,
            Optional(function_tags('functions')),
            quote('url')
        ])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_owl = And([annotation_tag, owl_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_owl.setParseAction(self.handle_namespace_owl)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_owl.setParseAction(self.handle_annotation_owl)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document, self.namespace_url, self.namespace_owl,
            self.annotation_url, self.annotation_list, self.annotation_owl,
            self.annotation_pattern, self.namespace_pattern
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)

    def handle_document(self, line, position, tokens):
        """Handles statements like ``SET DOCUMENT X = "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        key = tokens['key']
        value = tokens['value']

        if key not in DOCUMENT_KEYS:
            raise InvalidMetadataException(self.line_number, line, position,
                                           key, value)

        norm_key = DOCUMENT_KEYS[key]

        if norm_key in self.document_metadata:
            log.warning('Tried to overwrite metadata: %s', key)
            return tokens

        self.document_metadata[norm_key] = value

        if norm_key == METADATA_VERSION:
            self.raise_for_version(line, position, value)

        return tokens

    def raise_for_redefined_namespace(self, line, position, namespace):
        """Raises an exception if a namespace is already defined

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param str namespace: The namespace being parsed
        :raises: RedefinedNamespaceError
        """
        if self.disallow_redefinition and self.has_namespace(namespace):
            raise RedefinedNamespaceError(self.line_number, line, position,
                                          namespace)

    def handle_namespace_url(self, line, position, tokens):
        """Handles statements like ``DEFINE NAMESPACE X AS URL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedNamespaceError
        :raises: pybel.resources.exc.ResourceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)

        url = tokens['url']

        namespace_result = self.manager.ensure_namespace(url)

        if isinstance(namespace_result, dict):
            self.namespace_dict[namespace] = namespace_result
            self.uncachable_namespaces.add(url)
        else:
            self.namespace_dict[namespace] = namespace_result.to_values()

        self.namespace_url_dict[namespace] = url

        return tokens

    def handle_namespace_owl(self, line, position, tokens):
        """Handles statements like ``DEFINE NAMESPACE X AS OWL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedNamespaceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)

        functions = str(tokens['functions']
                        ) if 'functions' in tokens else BELNS_ENCODING_STR

        url = tokens['url']

        terms = self.manager.get_namespace_owl_terms(url, namespace)

        self.namespace_dict[namespace] = {term: functions for term in terms}
        self.namespace_owl_dict[namespace] = url

        return tokens

    def handle_namespace_pattern(self, line, position, tokens):
        """Handles statements like ``DEFINE NAMESPACE X AS PATTERN "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedNamespaceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)

        self.namespace_regex[namespace] = tokens['value']

        return tokens

    def raise_for_redefined_annotation(self, line, position, annotation):
        """Raises an exception if the given annotation is already defined

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param str annotation: The annotation being parsed
        :raises: RedefinedAnnotationError
        """
        if self.disallow_redefinition and self.has_annotation(annotation):
            raise RedefinedAnnotationError(self.line_number, line, position,
                                           annotation)

    def handle_annotation_owl(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS OWL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)

        url = tokens['url']
        self.annotation_dict[
            annotation] = self.manager.get_annotation_owl_terms(
                url, annotation)
        self.annotation_owl_dict[annotation] = url

        return tokens

    def handle_annotations_url(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS URL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        keyword = tokens['name']
        self.raise_for_redefined_annotation(line, position, keyword)

        url = tokens['url']
        self.annotation_dict[keyword] = self.manager.get_annotation_entries(
            url)
        self.annotation_url_dict[keyword] = url

        return tokens

    def handle_annotation_list(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)

        values = set(tokens['values'])

        self.annotation_dict[annotation] = values
        self.annotation_lists.add(annotation)

        return tokens

    def handle_annotation_pattern(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS PATTERN "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)
        self.annotation_regex[annotation] = tokens['value']
        return tokens

    def has_enumerated_annotation(self, annotation):
        """Checks if this annotation is defined by an enumeration

        :param str annotation: The keyword of a annotation
        :rtype: bool
        """
        return annotation in self.annotation_dict

    def has_regex_annotation(self, annotation):
        """Checks if this annotation is defined by a regular expression

        :param str annotation: The keyword of a annotation
        :rtype: bool
        """
        return annotation in self.annotation_regex

    def has_annotation(self, annotation):
        """Checks if this annotation is defined

        :param str annotation: The keyword of a annotation
        :rtype: bool
        """
        return self.has_enumerated_annotation(
            annotation) or self.has_regex_annotation(annotation)

    def has_enumerated_namespace(self, namespace):
        """Checks if this namespace is defined by an enumeration

        :param str namespace: The keyword of a namespace
        :rtype: bool
        """
        return namespace in self.namespace_dict

    def has_regex_namespace(self, namespace):
        """Checks if this namespace is defined by a regular expression

        :param str namespace: The keyword of a namespace
        :rtype: bool
        """
        return namespace in self.namespace_regex

    def has_namespace(self, namespace):
        """Checks if this namespace is defined

        :param str namespace: The keyword of a namespace
        :rtype: bool
        """
        return self.has_enumerated_namespace(
            namespace) or self.has_regex_namespace(namespace)

    def raise_for_version(self, line, position, version):
        """Checks that a version string is valid for BEL documents, meaning it's either in the YYYYMMDD or semantic version
        format

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param str version: A version string
        :raises: VersionFormatWarning
        """
        if valid_date_version(version):
            return

        if not SEMANTIC_VERSION_STRING_RE.match(version):
            raise VersionFormatWarning(self.line_number, line, position,
                                       version)