def __init__(self, parse_method=None, precedence=9): gt_lt_e = Literal("<") ^ Literal("<=") ^ Literal(">") ^ Literal(">=") And.__init__(self, gt_lt_e + Integer()) BaseType.__init__(self, precedence) if parse_method: self.addParseAction(parse_method)
def __init__(self, parse_method=None, precedence=9): gt_lt_e = Literal('<') ^ Literal("<=") ^ Literal('>') ^ Literal(">=") And.__init__(self, [gt_lt_e + Integer()]) BaseType.__init__(self, precedence) if parse_method: self.addParseAction(parse_method)
def load_symbols_file(self, file): file = self.xkb_basedir / file try: return self.loaded[file] except KeyError: pass sections = [] def quoted(name): return QuotedString(quoteChar='"', unquoteResults=True) # Callback, toks[0] is "foo" for xkb_symbols "foo" def new_symbols_section(name, loc, toks): assert len(toks) == 1 sections.append(XkbSymbols(file, toks[0])) # Callback, toks[0] is "foo(bar)" for include "foo(bar)" def append_includes(name, loc, toks): assert len(toks) == 1 sections[-1].includes.append(toks[0]) EOL = LineEnd().suppress() SECTIONTYPE = ( "default", "partial", "hidden", "alphanumeric_keys", "modifier_keys", "keypad_keys", "function_keys", "alternate_group", ) NAME = quoted("name").setParseAction(new_symbols_section) INCLUDE = (lit("include") + quoted("include").setParseAction(append_includes) + EOL) # We only care about includes OTHERLINE = And([~lit("};"), ~lit("include") + Regex(".*")]) + EOL with open(file) as fd: types = OneOrMore(oneOf(SECTIONTYPE)).suppress() include_or_other = Or([INCLUDE, OTHERLINE.suppress()]) section = (types + lit("xkb_symbols") + NAME + lit("{") + OneOrMore(include_or_other) + lit("};")) grammar = OneOrMore(section) grammar.ignore(cppStyleComment) try: result = grammar.parseFile(fd) except ParseException as e: raise XkbLoader.XkbParserException(str(e)) self.loaded[file] = sections return sections
def __init__(self, range_parse_method=None, item_parse_method=None, range_symbol='..', precedence=10): And.__init__( self, Integer(item_parse_method) + Literal(range_symbol) + Integer(item_parse_method)) BaseType.__init__(self, precedence) if range_parse_method: self.addParseAction(range_parse_method)
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative): elementList = [] for element in antlrAlternative.elements: rule = None if hasattr(element.atom, "c1") and element.atom.c1 != "": regex = r"[" + str(element.atom.c1[0]) + "-" + str(element.atom.c2[0] + "]") rule = Regex(regex)("anonymous_regex") elif hasattr(element, "block") and element.block != "": rule = __antlrAlternativesConverter(pyparsingRules, element.block) else: ruleRef = element.atom[0] assert ruleRef in pyparsingRules rule = pyparsingRules[ruleRef](ruleRef) if hasattr(element, "op") and element.op != "": if element.op == "+": rule = Group(OneOrMore(rule))("anonymous_one_or_more") elif element.op == "*": rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more") elif element.op == "?": rule = Optional(rule) else: raise Exception("rule operator not yet implemented : " + element.op) rule = rule elementList.append(rule) if len(elementList) > 1: rule = Group(And(elementList))("anonymous_and") else: rule = elementList[0] assert rule is not None return rule
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative): elementList = [] for element in antlrAlternative.elements: rule = None if hasattr(element.atom, 'c1') and element.atom.c1 != '': regex = r'[' + str( element.atom.c1[0]) + '-' + str(element.atom.c2[0] + ']') rule = Regex(regex)("anonymous_regex") elif hasattr(element, 'block') and element.block != '': rule = __antlrAlternativesConverter(pyparsingRules, element.block) else: ruleRef = element.atom assert ruleRef in pyparsingRules rule = pyparsingRules[element.atom](element.atom) if hasattr(element, 'op') and element.op != '': if element.op == '+': rule = Group(OneOrMore(rule))("anonymous_one_or_more") elif element.op == '*': rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more") elif element.op == '?': rule = Optional(rule) else: raise Exception('rule operator not yet implemented : ' + element.op) rule = rule elementList.append(rule) if len(elementList) > 1: rule = Group(And(elementList))("anonymous_and") else: rule = elementList[0] assert rule != None return rule
def __init__(self): self.continued = False ds9_attr_parser = get_ds9_attr_parser() ds9_shape_in_comment_defs = dict(text=wcs_shape(CoordOdd, CoordEven), vector=wcs_shape(CoordOdd, CoordEven, Distance, Angle), composite=wcs_shape(CoordOdd, CoordEven, Angle), projection=wcs_shape(CoordOdd, CoordEven, CoordOdd, CoordEven, Distance), segment=wcs_shape(CoordOdd, CoordEven, repeat=(0,2)), ) regionShape = define_shape_helper(ds9_shape_in_comment_defs) regionShape = regionShape.setParseAction(lambda s, l, tok: Shape(tok[0], tok[1:])) self.parser_default = ds9_attr_parser cont = CaselessKeyword("||").setParseAction(self.set_continued).suppress() line = Optional(And([regionShape, Optional(cont)])) \ + ds9_attr_parser self.parser_with_shape = line
def nest(*content): """Define a delimited list by enumerating each element of the list.""" if len(content) == 0: raise ValueError('no arguments supplied') return And([LPF, content[0]] + list(itt.chain.from_iterable(zip(itt.repeat(C), content[1:]))) + [RPF])
def build(parsers: dict): comma = Literal(",") rb = Literal(")") lb = Literal("(") srb = Literal("]") slb = Literal("[") number = Regex(r"0|[1-9][0-9]*") string = quotedString() name = Word(alphanums) label = Keyword(STRONG) | Keyword(WEAK) | Literal(SHORT_WEAK) param = Combine(Keyword(PARAM) + slb + number + srb) marker = Keyword(RESULT) | Keyword(TRUE) | Keyword(FALSE) | Keyword( THIS) | Keyword(_THIS) | param function = Keyword(GET) get = Literal(GETATTR) operator1 = Literal(MUL) | Literal(DIV) | Literal(MOD) operator2 = Literal(ADD) | Literal(SUB) operator3 = Literal(EQUAL) | Literal(NOT_EQUAL) operator3 |= And(Keyword(word) for word in IS_NOT.split(" ")) | Keyword(IS) operator4 = Literal(GREATER_OR_EQUAL) | Literal(GREATER) | Literal( LOWER_OR_EQUAL) | Literal(LOWER) operator5 = Keyword(AND) operator6 = Keyword(OR) operator7 = Keyword(FOLLOW) expression = Forward() string_st = string.setParseAction(parsers[STRING]) name_st = name.setParseAction(parsers[STRING]) marker_st = marker.setParseAction(parsers[MARKER]) tuple_st = expression + ZeroOrMore(comma + expression) round_invocation_st = (lb + Optional(tuple_st) + rb).setParseAction( parsers[INVOCATION]) function_st = (function + Suppress(round_invocation_st)).setParseAction( parsers[FUNCTION]) getattr_st = (marker_st | name_st) + OneOrMore( (get + Suppress(name_st)).setParseAction(parsers[OPERATOR])) atom_st = (lb + expression + rb) | function_st | string_st | getattr_st | marker_st operator_st = atom_st + ZeroOrMore( (operator1 + Suppress(atom_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator2 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator3 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator4 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator5 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator6 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) operator_st = operator_st + ZeroOrMore( (operator7 + Suppress(operator_st)).setParseAction(parsers[OPERATOR])) expression << operator_st getattr_st.enablePackrat() statement = (Optional(label, STRONG) + Suppress(expression)).setParseAction(parsers[LABEL]) return ZeroOrMore(statement) + StringEnd()
def get_fragment_language() -> ParserElement: """Build a protein fragment parser.""" _fragment_value_inner = fragment_range | missing_fragment(FRAGMENT_MISSING) _fragment_value = _fragment_value_inner | And( [Suppress('"'), _fragment_value_inner, Suppress('"')]) parser_element = fragment_tag + nest( _fragment_value + Optional(WCW + quote(FRAGMENT_DESCRIPTION))) return parser_element
def define_shape(name, shape_args, args_repeat=None): lparen = Literal("(").suppress() rparen = Literal(")").suppress() comma = Literal(",").suppress() shape_name = CaselessKeyword(name) if args_repeat is None: shape_with_parens = And( [shape_name, lparen, as_comma_separated_list(shape_args), rparen]) shape_with_spaces = shape_name + And(shape_args) else: n1, n2 = args_repeat sl = [] ss = shape_args[:n1] if ss: sl.append(as_comma_separated_list(ss)) ss = shape_args[n1:n2] if ss: ar = as_comma_separated_list(ss) if sl: sl.extend([comma + ar, ZeroOrMore(comma + ar)]) else: sl.extend([ar, ZeroOrMore(comma + ar)]) ss = shape_args[n2:] if ss: if sl: sl.extend([comma, as_comma_separated_list(ss)]) else: sl.extend([as_comma_separated_list(ss)]) sl = [shape_name, lparen] + sl + [rparen] shape_with_parens = And(sl) shape_with_spaces = shape_name + OneOrMore(And(shape_args)) return (shape_with_parens | shape_with_spaces)
def as_comma_separated_list(al): l = [al[0]] comma = Literal(",").suppress() for a1 in al[1:]: l.append(comma) l.append(a1) return And(l)
def get_fragment_language(): _fragment_value_inner = fragment_range | missing_fragment(FRAGMENT_MISSING) _fragment_value = (_fragment_value_inner | And( [Suppress('"'), _fragment_value_inner, Suppress('"')])) language = fragment_tag + nest(_fragment_value + Optional(WCW + quote(FRAGMENT_DESCRIPTION))) return language
def _parse_ascii_data(self, header_tokens, file_mmap, buffer_offset, aggregate_data_types): """ Parse the data portion of a PLY file assuming it uses ASCII format. :param header_tokens: :param file_mmap: :param buffer_offset: :param aggregate_data_types: :return: """ # Define the grammar of the body number = pyparsing_common.number() body_expr = list() for element in header_tokens.elements: sequences = list() for prop in element.properties: for variable in prop: if "index_type" in variable: sequences.append(countedArray(number)) else: sequences.append(number(variable.name)) element_data = Group(And(sequences)) body_expr.append(Group(element_data * element.count)(element.name)) ascii_grammar = And(body_expr) # Tokenize the body data body_tokens = ascii_grammar.parseString( file_mmap[buffer_offset:].decode("ascii"), parseAll=True) # Convert the data to arrays. element_data = dict() for name, dtype in aggregate_data_types.items(): element_data[name] = array.array(dtype, self._flatten(body_tokens[name])) return element_data
def make_date_parser(): date_expr = ppc.iso8601_date.copy() date_expr.setParseAction(ppc.convertToDate()) expr_last = And( CaselessKeyword('LAST') & ppc.integer.setResultsName('n') & StringEnd()).setResultsName('interval').setParseAction(handle_last) expr_prev = And( CaselessKeyword('PREVIOUS') & Or( CaselessKeyword('DAY').setResultsName('day') | CaselessKeyword('WEEK').setResultsName('week') | CaselessKeyword('MONTH').setResultsName('month')) + StringEnd()).setResultsName('interval').setParseAction(handle_previous) expr_fromto_date = And( CaselessKeyword('FROM') + date_expr.setResultsName('start') + CaselessKeyword('TO') + date_expr.setResultsName('end') + StringEnd()).setResultsName('interval').setParseAction(handle_fromto) parser = expr_fromto_date | expr_last | expr_prev return parser
def __init__(self): RegionPusher.__init__(self) self.shape_definition = ds9_shape_defs regionShape = define_shape_helper(self.shape_definition) regionShape = regionShape.setParseAction( lambda s, l, tok: Shape(tok[0], tok[1:])) regionExpr = define_expr( regionShape, negate_func=lambda s, l, tok: tok[-1].set_exclude(), ) coord_command_keys = [ 'PHYSICAL', 'IMAGE', 'FK4', 'B1950', 'FK5', 'J2000', 'GALACTIC', 'ECLIPTIC', 'ICRS', 'LINEAR', 'AMPLIFIER', 'DETECTOR' ] coordCommandLiterals = define_simple_literals(coord_command_keys) coordCommandWCS = Combine( CaselessLiteral("WCS") + Optional(Word(alphas))) coordCommand = (coordCommandLiterals | coordCommandWCS) coordCommand.setParseAction(lambda s, l, tok: CoordCommand(tok[-1])) regionGlobal = comment_shell_like(CaselessKeyword("global"), lambda s, l, tok: Global(tok[-1])) regionAtom = (regionExpr | coordCommand | regionGlobal) regionAtom = regionAtom.setParseAction(self.pushAtom) regionComment = comment_shell_like(Literal("#"), parseAction=self.pushComment) line_simple = define_line(atom=regionAtom, separator=Literal(";"), comment=regionComment) line_w_composite = And([regionAtom, CaselessKeyword("||").setParseAction(self.set_continued) ]) \ + Optional(regionComment) line = Or([line_simple, line_w_composite]) self.parser = Optional(line) + StringEnd()
def __init__(self): self.fragment_range = (ppc.integer | '?')(FRAGMENT_START) + '_' + ( ppc.integer | '?' | '*')(FRAGMENT_STOP) self.missing_fragment = Keyword('?')(FRAGMENT_MISSING) self._fragment_value_inner = self.fragment_range | self.missing_fragment( FRAGMENT_MISSING) self._fragment_value = (self._fragment_value_inner | And( [Suppress('"'), self._fragment_value_inner, Suppress('"')])) self.language = fragment_tag + nest( self._fragment_value + Optional(WCW + quote(FRAGMENT_DESCRIPTION))) super(FragmentParser, self).__init__(self.language)
def __init__(self): self.continued = False ds9_attr_parser = get_ds9_attr_parser() regionShape = define_shape_helper(ds9_shape_in_comment_defs) regionShape = regionShape.setParseAction( lambda s, l, tok: Shape(tok[0], tok[1:])) self.parser_default = ds9_attr_parser cont = CaselessKeyword("||").setParseAction( self.set_continued).suppress() line = Optional(And([regionShape, Optional(cont)])) + ds9_attr_parser self.parser_with_shape = line
def parse_value_comparison(op_string): """ Ex: 10 <=10 <10 >=10 >10 """ num = Word(nums + ".") # matches 1 or 1.1 ltgt = oneOf("< <= > >=") parser = num | And([ltgt, num]) try: return parser.parseString(op_string).asList() except ParseException: #:TODO what should really happen here? raise ValueError("'%s' is not a Valid operation" % op_string)
def _aggregate_property(cls, name, prefix, *keywords): """ Create a property group from the specified name, the pattern prefix (a ParseElement instance), and an iterable of keywords. Example: aggregate_property('position', CaselessKeyword('property'), *[CaselessKeyword('x'), ...]) :param name: :param prefix: :param keywords: :return: """ aggregates = list() for keyword in keywords: aggregates.append(Group(prefix + keyword("name"))) return Group(And(aggregates))(name)
def __init__(self): RegionPusher.__init__(self) self.shape_definition = ds9_shape_defs regionShape = define_shape_helper(self.shape_definition) regionShape = regionShape.setParseAction( lambda s, l, tok: Shape(tok[0], tok[1:])) regionExpr = define_expr( regionShape, negate_func=lambda s, l, tok: tok[-1].set_exclude(), ) coord_command_keys = "PHYSICAL IMAGE FK4 B1950 FK5 J2000 GALACTIC ECLIPTIC ICRS LINEAR AMPLIFIER DETECTOR".split( ) coordCommand = define_simple_literals( coord_command_keys, parseAction=lambda s, l, tok: CoordCommand(tok[-1])) regionGlobal = comment_shell_like(CaselessKeyword("global"), lambda s, l, tok: Global(tok[-1])) regionAtom = (regionExpr | coordCommand | regionGlobal) regionAtom = regionAtom.setParseAction(self.pushAtom) regionComment = comment_shell_like(Literal("#"), parseAction=self.pushComment) line_simple = define_line(atom=regionAtom, separator=Literal(";"), comment=regionComment) line_w_composite = And([regionAtom, CaselessKeyword("||").setParseAction(self.set_continued) ]) \ + Optional(regionComment) line = Or([line_simple, line_w_composite]) self.parser = Optional(line) + StringEnd()
def __init__( self, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, citation_clearing: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Initialize the control statement parser. :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing :param annotation_to_pattern: A dictionary of {annotation: regular expression string} :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? :param required_annotations: Annotations that are required """ self.citation_clearing = citation_clearing self.annotation_to_term = annotation_to_term or {} self.annotation_to_pattern = annotation_to_pattern or {} self.annotation_to_local = annotation_to_local or {} self.statement_group = None self.citation_db = None self.citation_db_id = None self.evidence = None self.annotations = {} self.required_annotations = required_annotations or [] annotation_key = ppc.identifier('key').setParseAction( self.handle_annotation_key) self.set_statement_group = set_statement_group_stub().setParseAction( self.handle_set_statement_group) self.set_citation = set_citation_stub.setParseAction( self.handle_set_citation) self.set_evidence = set_evidence_stub.setParseAction( self.handle_set_evidence) set_command_prefix = And([annotation_key('key'), Suppress('=')]) self.set_command = set_command_prefix + qid('value') self.set_command.setParseAction(self.handle_set_command) self.set_command_list = set_command_prefix + delimited_quoted_list( 'values') self.set_command_list.setParseAction(self.handle_set_command_list) self.unset_command = annotation_key('key') self.unset_command.addParseAction(self.handle_unset_command) self.unset_evidence = supporting_text_tags(EVIDENCE) self.unset_evidence.setParseAction(self.handle_unset_evidence) self.unset_citation = Suppress(BEL_KEYWORD_CITATION) self.unset_citation.setParseAction(self.handle_unset_citation) self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP) self.unset_statement_group.setParseAction( self.handle_unset_statement_group) self.unset_list = delimited_unquoted_list('values') self.unset_list.setParseAction(self.handle_unset_list) self.unset_all = unset_all.setParseAction(self.handle_unset_all) self.set_statements = set_tag + MatchFirst([ self.set_statement_group, self.set_citation, self.set_evidence, self.set_command, self.set_command_list, ]) self.unset_statements = unset_tag + MatchFirst([ self.unset_all, self.unset_citation, self.unset_evidence, self.unset_statement_group, self.unset_command, self.unset_list, ]) self.language = self.set_statements | self.unset_statements super(ControlParser, self).__init__(self.language)
def __init__( self, manager, namespace_to_term_to_encoding: Optional[ NamespaceTermEncodingMapping] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, default_namespace: Optional[Set[str]] = None, allow_redefinition: bool = False, skip_validation: bool = False, upgrade_urls: bool = False, ) -> None: """Build a metadata parser. :param manager: A cache manager :param namespace_to_term_to_encoding: An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}} :param namespace_to_pattern: A regular expression namespace mapping from {namespace keyword: regex string} :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values} :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string} :param default_namespace: A set of strings that can be used without a namespace :param skip_validation: If true, don't download and cache namespaces/annotations """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition self.skip_validation = skip_validation self.upgrade_urls = upgrade_urls #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}} self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {} #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary of {namespace keyword: regular expression string} self.namespace_to_pattern = namespace_to_pattern or {} #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_term = annotation_to_term or {} #: A dictionary of {annotation keyword: regular expression string} self.annotation_to_pattern = annotation_to_pattern or {} #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_local = annotation_to_local or {} #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value'), ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.annotation_url, self.annotation_list, self.annotation_pattern, self.namespace_pattern, ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language)
def __init__( self, graph, namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, allow_naked_names: bool = False, allow_nested: bool = False, disallow_unqualified_translocations: bool = False, citation_clearing: bool = True, skip_validation: bool = False, autostreamline: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param autostreamline: Should the parser be streamlined on instantiation? :param required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_to_term=annotation_to_term, annotation_to_pattern=annotation_to_pattern, annotation_to_local=annotation_to_local, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_to_term=namespace_to_term, namespace_to_pattern=namespace_to_pattern, ) self.control_parser.get_line_number = self.get_line_number self.identifier_parser.get_line_number = self.get_line_number identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = get_protein_modification_language( self.identifier_parser.identifier_qualified) #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = get_location_language(self.identifier_parser.language) opt_location = pyparsing.Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language( self.identifier_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = get_fusion_language(self.identifier_parser.language) # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest( ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'c'))(FUSION) #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_ self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) self.mirna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList( Group(MatchFirst([self.pmod, variant, fragment, psub, trunc]))) (VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'p'))(FUSION) #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_ self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) self.rna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'r'))(FUSION) #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_ self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest( ungrouped_identifier) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT))) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT)) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT)) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term))) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) self.label_relationship = And([ Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT) ]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline)
EVIDENCE, ) from ..utils import citation_dict __all__ = ['ControlParser'] logger = logging.getLogger(__name__) set_tag = Suppress(BEL_KEYWORD_SET) unset_tag = Suppress(BEL_KEYWORD_UNSET) unset_all = Suppress(BEL_KEYWORD_ALL) supporting_text_tags = oneOf([BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT]) set_statement_group_stub = And( [Suppress(BEL_KEYWORD_STATEMENT_GROUP), Suppress('='), qid('group')]) set_citation_stub = And([ Suppress(BEL_KEYWORD_CITATION), Suppress('='), delimited_quoted_list('values') ]) set_evidence_stub = And( [Suppress(supporting_text_tags), Suppress('='), quote('value')]) class ControlParser(BaseParser): """A parser for BEL control statements.
def create_grammar(container_ids, secret_ids): """ Create the grammar for the editfile """ from pyparsing import (nums, alphas, lineEnd, stringEnd, OneOrMore, ZeroOrMore, SkipTo, Optional, And, Word, CharsNotIn, Empty, QuotedString, Literal, Suppress, Group, Combine, originalTextFor, Forward, ParserElement) # Read from bottom to top whiteSpaceChars = ' \t' ParserElement.setDefaultWhitespaceChars(whiteSpaceChars) word = Empty() + CharsNotIn(whiteSpaceChars + '\n') quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction( # NOTE the second replace is a work-around for # pyparsing bug #68. # https://sourceforge.net/p/pyparsing/bugs/68/ lambda s,l,t: t[0].replace("\\n", "\n").replace( "\\\\", "\\")) def secretIdNumberParseAction(s, loc, tokens): v = int(tokens[0]) if not v in secret_ids: raise ParseException(s, loc, "Not a valid secret id") return v secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction) def containerIdParseAction(s, loc, tokens): v = int(tokens[0]) if not v in container_ids: raise ParseException(s, loc, "Not a valid container id") return v containerId = Word(nums).setParseAction(containerIdParseAction) key = quotedString | word secretString = ~Literal('#') + (quotedString | word) secretId = Suppress('#') + secretIdNumber secret = secretString | secretId note = quotedString | originalTextFor(OneOrMore(word)) containerKeyword = Suppress('CONTAINER') entry = (~containerKeyword + Group(key - secret - Optional(note)) - Suppress(lineEnd)) comment = Suppress(lineEnd | '#' + SkipTo(lineEnd)) line = comment | entry containerLine = containerKeyword + containerId + comment # Instead of the following recursive grammar, we could have simply used # # containerBlock = ZeroOrMore(comment) + Group(containerLine # + Group(OneOrMore(line))) # multipleContainers = OneOrMore(containerBlock) # # but then any parsing error in line will result in a "expected stringEnd" # or "expected CONTAINER". _multipleContainers_head = Forward() _multipleContainers_body = Forward() _multipleContainers_head << (stringEnd | comment + _multipleContainers_head | containerLine + _multipleContainers_body) _multipleContainers_body << (stringEnd | (containerLine | line) + _multipleContainers_body) _multipleContainers_entry = And([entry]) multipleContainers = And([_multipleContainers_head]) # TODO ibidem below containerLine.setParseAction(lambda s,l,t: [[None, t[0]]]) def multipleContainersParseAction(s, loc, tokens): curEntries = [] curId = None ret = [] for t in tuple(tokens) + ((None, None),): if t[0] is not None: assert curId is not None curEntries.append(t) continue if curId is not None: ret.append([curId, curEntries]) curId = t[1] curEntries = [] return ret multipleContainers.setParseAction(multipleContainersParseAction) oneContainer = ZeroOrMore(line) + stringEnd oneContainer.setParseAction(lambda s,l,t: [[None, t]]) grammar = multipleContainers | oneContainer return grammar
class BELParser(BaseParser): """Build a parser backed by a given dictionary of namespaces.""" def __init__( self, graph, namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, allow_naked_names: bool = False, allow_nested: bool = False, disallow_unqualified_translocations: bool = False, citation_clearing: bool = True, skip_validation: bool = False, autostreamline: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param autostreamline: Should the parser be streamlined on instantiation? :param required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_to_term=annotation_to_term, annotation_to_pattern=annotation_to_pattern, annotation_to_local=annotation_to_local, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_to_term=namespace_to_term, namespace_to_pattern=namespace_to_pattern, ) self.control_parser.get_line_number = self.get_line_number self.identifier_parser.get_line_number = self.get_line_number identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = get_protein_modification_language( self.identifier_parser.identifier_qualified) #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = get_location_language(self.identifier_parser.language) opt_location = pyparsing.Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language( self.identifier_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = get_fusion_language(self.identifier_parser.language) # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest( ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'c'))(FUSION) #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_ self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) self.mirna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList( Group(MatchFirst([self.pmod, variant, fragment, psub, trunc]))) (VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'p'))(FUSION) #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_ self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) self.rna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'r'))(FUSION) #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_ self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest( ungrouped_identifier) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT))) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT)) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT)) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term))) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) self.label_relationship = And([ Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT) ]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline) @property def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]: """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser.""" return self.identifier_parser.namespace_to_terms @property def _allow_naked_names(self) -> bool: """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``).""" return self.identifier_parser.allow_naked_names def get_annotations(self) -> Dict: """Get the current annotations in this parser.""" return self.control_parser.get_annotations() def clear(self): """Clear the graph and all control parser data (current citation, annotations, and statement group).""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line: str, position: int, tokens: ParseResults): """Handle nested statements. If :code:`allow_nested` is False, raises a ``NestedRelationWarning``. :raises: NestedRelationWarning """ if not self.allow_nested: raise NestedRelationWarning(self.get_line_number(), line, position) self._handle_relation_harness( line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT], }) self._handle_relation_harness( line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT], }) return tokens def check_function_semantics(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Raise an exception if the function used on the tokens is wrong. :raises: InvalidFunctionSemantic """ if not self._namespace_dict or NAMESPACE not in tokens: return tokens namespace, name = tokens[NAMESPACE], tokens[NAME] if namespace in self.identifier_parser.namespace_to_pattern: return tokens if self._allow_naked_names and tokens[ NAMESPACE] == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set( itt.chain.from_iterable( belns_encodings.get(k, set()) for k in self._namespace_dict[namespace][name])) if not valid_functions: raise InvalidEntity(self.get_line_number(), line, position, namespace, name) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic(self.get_line_number(), line, position, tokens[FUNCTION], namespace, name, valid_functions) return tokens def handle_term(self, _, __, tokens: ParseResults) -> ParseResults: """Handle BEL terms (the subject and object of BEL relations).""" self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens: ParseResults, relation: str) -> ParseResults: """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`.""" parent_node_dsl = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_dsl = self.ensure_node(child_tokens) self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl, relation) return tokens def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, HAS_MEMBER) def handle_has_components(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, HAS_COMPONENT) def _add_qualified_edge_helper(self, u, v, relation, annotations, subject_modifier, object_modifier) -> str: """Add a qualified edge from the internal aspects of the parser.""" return self.graph.add_qualified_edge( u, v, relation=relation, evidence=self.control_parser.evidence, citation=self.control_parser.citation.copy(), annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **{LINE: self.get_line_number()}) def _add_qualified_edge(self, u, v, relation, annotations, subject_modifier, object_modifier) -> str: """Add an edge, then adds the opposite direction edge if it should.""" sha512 = self._add_qualified_edge_helper( u, v, relation=relation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper( v, u, relation=relation, annotations=annotations, object_modifier=subject_modifier, subject_modifier=object_modifier, ) return sha512 def _handle_relation(self, tokens: ParseResults) -> str: """Handle a relation.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) subject_modifier = modifier_po_to_dict(tokens[SUBJECT]) object_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = { annotation_name: ({ae: True for ae in annotation_entry} if isinstance( annotation_entry, set) else { annotation_entry: True }) for annotation_name, annotation_entry in self.control_parser.annotations.items() } return self._add_qualified_edge( subject_node_dsl, object_node_dsl, relation=tokens[RELATION], annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) def _handle_relation_harness( self, line: str, position: int, tokens: Union[ParseResults, Dict]) -> ParseResults: """Handle BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! """ if not self.control_parser.citation: raise MissingCitationException(self.get_line_number(), line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.get_line_number(), line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations( ) if missing_required_annotations: raise MissingAnnotationWarning(self.get_line_number(), line, position, missing_required_annotations) self._handle_relation(tokens) return tokens def handle_unqualified_relation(self, _, __, tokens: ParseResults) -> ParseResults: """Handle unqualified relations.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) relation = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl, relation) return tokens def handle_label_relation(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``p(X) label "Label for X"``. :raises: RelabelWarning """ subject_node_dsl = self.ensure_node(tokens[SUBJECT]) description = tokens[OBJECT] if self.graph.has_node_description(subject_node_dsl): raise RelabelWarning( line_number=self.get_line_number(), line=line, position=position, node=self.graph.node, old_label=self.graph.get_node_description(subject_node_dsl), new_label=description) self.graph.set_node_description(subject_node_dsl, description) return tokens def ensure_node(self, tokens: ParseResults) -> BaseEntity: """Turn parsed tokens into canonical node name and makes sure its in the graph.""" if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node = parse_result_to_dsl(tokens) self.graph.add_node_from_data(node) return node def handle_translocation_illegal(self, line: str, position: int, tokens: ParseResults) -> None: """Handle a malformed translocation.""" raise MalformedTranslocationWarning(self.get_line_number(), line, position, tokens)
def __init__(self, parse_method=None, field_separator=":", precedence=11): And.__init__(self, FieldName() + Literal(field_separator)) BaseType.__init__(self, precedence) if parse_method: self.addParseAction(parse_method)
def __init__(self, range_parse_method=None, item_parse_method=None, range_symbol="..", precedence=10): And.__init__(self, Integer(item_parse_method) + Literal(range_symbol) + Integer(item_parse_method)) BaseType.__init__(self, precedence) if range_parse_method: self.addParseAction(range_parse_method)
def __init__(self, parse_method=None, precedence=11): And.__init__(self, [QuotedString() + Literal('~') + Integer()]) BaseType.__init__(self, precedence) if parse_method: self.addParseAction(parse_method)
def __init__(self, parse_method=None, field_separator=':', precedence=11): And.__init__(self, [FieldName() + Literal(field_separator)]) BaseType.__init__(self, precedence) if parse_method: self.addParseAction(parse_method)
else: message += crlf message.leaveWhitespace() build_message() dispatcher.connect(build_message, 'parser.trailing_spaces', 'config') dispatcher.connect(build_message, 'parser.soft_eol', 'config') chanstring = charclass((0x01, 0x06), (0x08, 0x09), (0x0B, 0x0C), (0x0E, 0x1F), (0x21, 0x2B), (0x2D, 0x39), (0x3B, 0xFF)) channelid = 5 * (charclass((0x41, 0x5A)) ^ digit) channel = And([ Or([oneOf('# + &'), Literal('!') + Group(channelid)]), Group(OneOrMore(chanstring)), Optional(Suppress(Literal(':')) + Group(OneOrMore(chanstring))) ]) ### # Wildcard expressions ### wildone = Literal('?') wildmany = Literal('*') nowild = charclass((0x01, 0x29), (0x2B, 0x3E), (0x40, 0xFF)) noesc = charclass((0x01, 0x5B), (0x5D, 0xFF)) mask = Optional(wildone ^ wildmany) + \ ZeroOrMore(nowild ^ (noesc + wildone) ^ (noesc + wildmany)) # Fall back to regex for parsing wildcards matchone = '[%s-%s]' % (chr(0x01), chr(0xFF))
def __init__(self, manager, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, default_namespace=None, allow_redefinition=False): """ :param pybel.manager.Manager manager: A cache manager :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from {namespace keyword: {name: encoding}} :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from {annotation keyword: set of valid values} :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from {namespace keyword: regex string} :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from {annotation keyword: regex string} :param set[str] default_namespace: A set of strings that can be used without a namespace """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition #: A dictionary of cached {namespace keyword: {name: encoding}} self.namespace_dict = {} if namespace_dict is None else namespace_dict #: A dictionary of cached {annotation keyword: set of values} self.annotation_dict = {} if annotation_dict is None else annotation_dict #: A dictionary of {namespace keyword: regular expression string} self.namespace_regex = {} if namespace_regex is None else namespace_regex #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of {annotation keyword: regular expression string} self.annotation_regex = {} if annotation_regex is None else annotation_regex #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {namespace keyword: OWL namespace URL} self.namespace_owl_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} #: A dictionary from {annotation keyword: OWL annotation URL} self.annotation_owl_dict = {} #: A set of annotation keywords that are defined ad-hoc in the BEL script self.annotation_lists = set() self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value') ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_owl = And([ namespace_tag, owl_tag, Optional(function_tags('functions')), quote('url') ]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_owl = And([annotation_tag, owl_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_owl.setParseAction(self.handle_namespace_owl) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_owl.setParseAction(self.handle_annotation_owl) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.namespace_owl, self.annotation_url, self.annotation_list, self.annotation_owl, self.annotation_pattern, self.namespace_pattern ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language)
class MetadataParser(BaseParser): """A parser for the document and definitions section of a BEL document. .. seealso:: BEL 1.0 Specification for the `DEFINE <http://openbel.org/language/web/version_1.0/bel_specification_version_1.0.html#_define>`_ keyword """ def __init__(self, manager, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, default_namespace=None, allow_redefinition=False): """ :param pybel.manager.Manager manager: A cache manager :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from {namespace keyword: {name: encoding}} :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from {annotation keyword: set of valid values} :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from {namespace keyword: regex string} :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from {annotation keyword: regex string} :param set[str] default_namespace: A set of strings that can be used without a namespace """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition #: A dictionary of cached {namespace keyword: {name: encoding}} self.namespace_dict = {} if namespace_dict is None else namespace_dict #: A dictionary of cached {annotation keyword: set of values} self.annotation_dict = {} if annotation_dict is None else annotation_dict #: A dictionary of {namespace keyword: regular expression string} self.namespace_regex = {} if namespace_regex is None else namespace_regex #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of {annotation keyword: regular expression string} self.annotation_regex = {} if annotation_regex is None else annotation_regex #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {namespace keyword: OWL namespace URL} self.namespace_owl_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} #: A dictionary from {annotation keyword: OWL annotation URL} self.annotation_owl_dict = {} #: A set of annotation keywords that are defined ad-hoc in the BEL script self.annotation_lists = set() self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value') ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_owl = And([ namespace_tag, owl_tag, Optional(function_tags('functions')), quote('url') ]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_owl = And([annotation_tag, owl_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_owl.setParseAction(self.handle_namespace_owl) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_owl.setParseAction(self.handle_annotation_owl) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.namespace_owl, self.annotation_url, self.annotation_list, self.annotation_owl, self.annotation_pattern, self.namespace_pattern ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language) def handle_document(self, line, position, tokens): """Handles statements like ``SET DOCUMENT X = "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ key = tokens['key'] value = tokens['value'] if key not in DOCUMENT_KEYS: raise InvalidMetadataException(self.line_number, line, position, key, value) norm_key = DOCUMENT_KEYS[key] if norm_key in self.document_metadata: log.warning('Tried to overwrite metadata: %s', key) return tokens self.document_metadata[norm_key] = value if norm_key == METADATA_VERSION: self.raise_for_version(line, position, value) return tokens def raise_for_redefined_namespace(self, line, position, namespace): """Raises an exception if a namespace is already defined :param str line: The line being parsed :param int position: The position in the line being parsed :param str namespace: The namespace being parsed :raises: RedefinedNamespaceError """ if self.disallow_redefinition and self.has_namespace(namespace): raise RedefinedNamespaceError(self.line_number, line, position, namespace) def handle_namespace_url(self, line, position, tokens): """Handles statements like ``DEFINE NAMESPACE X AS URL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError :raises: pybel.resources.exc.ResourceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) url = tokens['url'] namespace_result = self.manager.ensure_namespace(url) if isinstance(namespace_result, dict): self.namespace_dict[namespace] = namespace_result self.uncachable_namespaces.add(url) else: self.namespace_dict[namespace] = namespace_result.to_values() self.namespace_url_dict[namespace] = url return tokens def handle_namespace_owl(self, line, position, tokens): """Handles statements like ``DEFINE NAMESPACE X AS OWL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) functions = str(tokens['functions'] ) if 'functions' in tokens else BELNS_ENCODING_STR url = tokens['url'] terms = self.manager.get_namespace_owl_terms(url, namespace) self.namespace_dict[namespace] = {term: functions for term in terms} self.namespace_owl_dict[namespace] = url return tokens def handle_namespace_pattern(self, line, position, tokens): """Handles statements like ``DEFINE NAMESPACE X AS PATTERN "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) self.namespace_regex[namespace] = tokens['value'] return tokens def raise_for_redefined_annotation(self, line, position, annotation): """Raises an exception if the given annotation is already defined :param str line: The line being parsed :param int position: The position in the line being parsed :param str annotation: The annotation being parsed :raises: RedefinedAnnotationError """ if self.disallow_redefinition and self.has_annotation(annotation): raise RedefinedAnnotationError(self.line_number, line, position, annotation) def handle_annotation_owl(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS OWL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) url = tokens['url'] self.annotation_dict[ annotation] = self.manager.get_annotation_owl_terms( url, annotation) self.annotation_owl_dict[annotation] = url return tokens def handle_annotations_url(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS URL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ keyword = tokens['name'] self.raise_for_redefined_annotation(line, position, keyword) url = tokens['url'] self.annotation_dict[keyword] = self.manager.get_annotation_entries( url) self.annotation_url_dict[keyword] = url return tokens def handle_annotation_list(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) values = set(tokens['values']) self.annotation_dict[annotation] = values self.annotation_lists.add(annotation) return tokens def handle_annotation_pattern(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS PATTERN "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) self.annotation_regex[annotation] = tokens['value'] return tokens def has_enumerated_annotation(self, annotation): """Checks if this annotation is defined by an enumeration :param str annotation: The keyword of a annotation :rtype: bool """ return annotation in self.annotation_dict def has_regex_annotation(self, annotation): """Checks if this annotation is defined by a regular expression :param str annotation: The keyword of a annotation :rtype: bool """ return annotation in self.annotation_regex def has_annotation(self, annotation): """Checks if this annotation is defined :param str annotation: The keyword of a annotation :rtype: bool """ return self.has_enumerated_annotation( annotation) or self.has_regex_annotation(annotation) def has_enumerated_namespace(self, namespace): """Checks if this namespace is defined by an enumeration :param str namespace: The keyword of a namespace :rtype: bool """ return namespace in self.namespace_dict def has_regex_namespace(self, namespace): """Checks if this namespace is defined by a regular expression :param str namespace: The keyword of a namespace :rtype: bool """ return namespace in self.namespace_regex def has_namespace(self, namespace): """Checks if this namespace is defined :param str namespace: The keyword of a namespace :rtype: bool """ return self.has_enumerated_namespace( namespace) or self.has_regex_namespace(namespace) def raise_for_version(self, line, position, version): """Checks that a version string is valid for BEL documents, meaning it's either in the YYYYMMDD or semantic version format :param str line: The line being parsed :param int position: The position in the line being parsed :param str version: A version string :raises: VersionFormatWarning """ if valid_date_version(version): return if not SEMANTIC_VERSION_STRING_RE.match(version): raise VersionFormatWarning(self.line_number, line, position, version)