class BELParser(BaseParser): """Build a parser backed by a given dictionary of namespaces.""" def __init__( self, graph, namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, allow_naked_names: bool = False, allow_nested: bool = False, disallow_unqualified_translocations: bool = False, citation_clearing: bool = True, skip_validation: bool = False, autostreamline: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param autostreamline: Should the parser be streamlined on instantiation? :param required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_to_term=annotation_to_term, annotation_to_pattern=annotation_to_pattern, annotation_to_local=annotation_to_local, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_to_term=namespace_to_term, namespace_to_pattern=namespace_to_pattern, ) self.control_parser.get_line_number = self.get_line_number self.identifier_parser.get_line_number = self.get_line_number identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = get_protein_modification_language( self.identifier_parser.identifier_qualified) #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = get_location_language(self.identifier_parser.language) opt_location = pyparsing.Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language( self.identifier_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = get_fusion_language(self.identifier_parser.language) # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest( ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'c'))(FUSION) #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_ self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) self.mirna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList( Group(MatchFirst([self.pmod, variant, fragment, psub, trunc]))) (VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'p'))(FUSION) #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_ self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) self.rna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'r'))(FUSION) #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_ self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest( ungrouped_identifier) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT))) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT)) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT)) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term))) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) self.label_relationship = And([ Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT) ]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline) @property def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]: """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser.""" return self.identifier_parser.namespace_to_terms @property def _allow_naked_names(self) -> bool: """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``).""" return self.identifier_parser.allow_naked_names def get_annotations(self) -> Dict: """Get the current annotations in this parser.""" return self.control_parser.get_annotations() def clear(self): """Clear the graph and all control parser data (current citation, annotations, and statement group).""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line: str, position: int, tokens: ParseResults): """Handle nested statements. If :code:`allow_nested` is False, raises a ``NestedRelationWarning``. :raises: NestedRelationWarning """ if not self.allow_nested: raise NestedRelationWarning(self.get_line_number(), line, position) self._handle_relation_harness( line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT], }) self._handle_relation_harness( line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT], }) return tokens def check_function_semantics(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Raise an exception if the function used on the tokens is wrong. :raises: InvalidFunctionSemantic """ if not self._namespace_dict or NAMESPACE not in tokens: return tokens namespace, name = tokens[NAMESPACE], tokens[NAME] if namespace in self.identifier_parser.namespace_to_pattern: return tokens if self._allow_naked_names and tokens[ NAMESPACE] == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set( itt.chain.from_iterable( belns_encodings.get(k, set()) for k in self._namespace_dict[namespace][name])) if not valid_functions: raise InvalidEntity(self.get_line_number(), line, position, namespace, name) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic(self.get_line_number(), line, position, tokens[FUNCTION], namespace, name, valid_functions) return tokens def handle_term(self, _, __, tokens: ParseResults) -> ParseResults: """Handle BEL terms (the subject and object of BEL relations).""" self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens: ParseResults, relation: str) -> ParseResults: """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`.""" parent_node_dsl = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_dsl = self.ensure_node(child_tokens) self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl, relation) return tokens def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, HAS_MEMBER) def handle_has_components(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, HAS_COMPONENT) def _add_qualified_edge_helper(self, u, v, relation, annotations, subject_modifier, object_modifier) -> str: """Add a qualified edge from the internal aspects of the parser.""" return self.graph.add_qualified_edge( u, v, relation=relation, evidence=self.control_parser.evidence, citation=self.control_parser.citation.copy(), annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **{LINE: self.get_line_number()}) def _add_qualified_edge(self, u, v, relation, annotations, subject_modifier, object_modifier) -> str: """Add an edge, then adds the opposite direction edge if it should.""" sha512 = self._add_qualified_edge_helper( u, v, relation=relation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper( v, u, relation=relation, annotations=annotations, object_modifier=subject_modifier, subject_modifier=object_modifier, ) return sha512 def _handle_relation(self, tokens: ParseResults) -> str: """Handle a relation.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) subject_modifier = modifier_po_to_dict(tokens[SUBJECT]) object_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = { annotation_name: ({ae: True for ae in annotation_entry} if isinstance( annotation_entry, set) else { annotation_entry: True }) for annotation_name, annotation_entry in self.control_parser.annotations.items() } return self._add_qualified_edge( subject_node_dsl, object_node_dsl, relation=tokens[RELATION], annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) def _handle_relation_harness( self, line: str, position: int, tokens: Union[ParseResults, Dict]) -> ParseResults: """Handle BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! """ if not self.control_parser.citation: raise MissingCitationException(self.get_line_number(), line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.get_line_number(), line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations( ) if missing_required_annotations: raise MissingAnnotationWarning(self.get_line_number(), line, position, missing_required_annotations) self._handle_relation(tokens) return tokens def handle_unqualified_relation(self, _, __, tokens: ParseResults) -> ParseResults: """Handle unqualified relations.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) relation = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl, relation) return tokens def handle_label_relation(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``p(X) label "Label for X"``. :raises: RelabelWarning """ subject_node_dsl = self.ensure_node(tokens[SUBJECT]) description = tokens[OBJECT] if self.graph.has_node_description(subject_node_dsl): raise RelabelWarning( line_number=self.get_line_number(), line=line, position=position, node=self.graph.node, old_label=self.graph.get_node_description(subject_node_dsl), new_label=description) self.graph.set_node_description(subject_node_dsl, description) return tokens def ensure_node(self, tokens: ParseResults) -> BaseEntity: """Turn parsed tokens into canonical node name and makes sure its in the graph.""" if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node = parse_result_to_dsl(tokens) self.graph.add_node_from_data(node) return node def handle_translocation_illegal(self, line: str, position: int, tokens: ParseResults) -> None: """Handle a malformed translocation.""" raise MalformedTranslocationWarning(self.get_line_number(), line, position, tokens)
class MetadataParser(BaseParser): """A parser for the document and definitions section of a BEL document. .. seealso:: BEL 1.0 Specification for the `DEFINE <http://openbel.org/language/web/version_1.0/bel_specification_version_1.0.html#_define>`_ keyword """ def __init__(self, manager, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, default_namespace=None, allow_redefinition=False): """ :param pybel.manager.Manager manager: A cache manager :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from {namespace keyword: {name: encoding}} :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from {annotation keyword: set of valid values} :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from {namespace keyword: regex string} :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from {annotation keyword: regex string} :param set[str] default_namespace: A set of strings that can be used without a namespace """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition #: A dictionary of cached {namespace keyword: {name: encoding}} self.namespace_dict = {} if namespace_dict is None else namespace_dict #: A dictionary of cached {annotation keyword: set of values} self.annotation_dict = {} if annotation_dict is None else annotation_dict #: A dictionary of {namespace keyword: regular expression string} self.namespace_regex = {} if namespace_regex is None else namespace_regex #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of {annotation keyword: regular expression string} self.annotation_regex = {} if annotation_regex is None else annotation_regex #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {namespace keyword: OWL namespace URL} self.namespace_owl_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} #: A dictionary from {annotation keyword: OWL annotation URL} self.annotation_owl_dict = {} #: A set of annotation keywords that are defined ad-hoc in the BEL script self.annotation_lists = set() self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value') ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_owl = And([ namespace_tag, owl_tag, Optional(function_tags('functions')), quote('url') ]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_owl = And([annotation_tag, owl_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_owl.setParseAction(self.handle_namespace_owl) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_owl.setParseAction(self.handle_annotation_owl) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.namespace_owl, self.annotation_url, self.annotation_list, self.annotation_owl, self.annotation_pattern, self.namespace_pattern ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language) def handle_document(self, line, position, tokens): """Handles statements like ``SET DOCUMENT X = "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ key = tokens['key'] value = tokens['value'] if key not in DOCUMENT_KEYS: raise InvalidMetadataException(self.line_number, line, position, key, value) norm_key = DOCUMENT_KEYS[key] if norm_key in self.document_metadata: log.warning('Tried to overwrite metadata: %s', key) return tokens self.document_metadata[norm_key] = value if norm_key == METADATA_VERSION: self.raise_for_version(line, position, value) return tokens def raise_for_redefined_namespace(self, line, position, namespace): """Raises an exception if a namespace is already defined :param str line: The line being parsed :param int position: The position in the line being parsed :param str namespace: The namespace being parsed :raises: RedefinedNamespaceError """ if self.disallow_redefinition and self.has_namespace(namespace): raise RedefinedNamespaceError(self.line_number, line, position, namespace) def handle_namespace_url(self, line, position, tokens): """Handles statements like ``DEFINE NAMESPACE X AS URL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError :raises: pybel.resources.exc.ResourceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) url = tokens['url'] namespace_result = self.manager.ensure_namespace(url) if isinstance(namespace_result, dict): self.namespace_dict[namespace] = namespace_result self.uncachable_namespaces.add(url) else: self.namespace_dict[namespace] = namespace_result.to_values() self.namespace_url_dict[namespace] = url return tokens def handle_namespace_owl(self, line, position, tokens): """Handles statements like ``DEFINE NAMESPACE X AS OWL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) functions = str(tokens['functions'] ) if 'functions' in tokens else BELNS_ENCODING_STR url = tokens['url'] terms = self.manager.get_namespace_owl_terms(url, namespace) self.namespace_dict[namespace] = {term: functions for term in terms} self.namespace_owl_dict[namespace] = url return tokens def handle_namespace_pattern(self, line, position, tokens): """Handles statements like ``DEFINE NAMESPACE X AS PATTERN "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) self.namespace_regex[namespace] = tokens['value'] return tokens def raise_for_redefined_annotation(self, line, position, annotation): """Raises an exception if the given annotation is already defined :param str line: The line being parsed :param int position: The position in the line being parsed :param str annotation: The annotation being parsed :raises: RedefinedAnnotationError """ if self.disallow_redefinition and self.has_annotation(annotation): raise RedefinedAnnotationError(self.line_number, line, position, annotation) def handle_annotation_owl(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS OWL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) url = tokens['url'] self.annotation_dict[ annotation] = self.manager.get_annotation_owl_terms( url, annotation) self.annotation_owl_dict[annotation] = url return tokens def handle_annotations_url(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS URL "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ keyword = tokens['name'] self.raise_for_redefined_annotation(line, position, keyword) url = tokens['url'] self.annotation_dict[keyword] = self.manager.get_annotation_entries( url) self.annotation_url_dict[keyword] = url return tokens def handle_annotation_list(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) values = set(tokens['values']) self.annotation_dict[annotation] = values self.annotation_lists.add(annotation) return tokens def handle_annotation_pattern(self, line, position, tokens): """Handles statements like ``DEFINE ANNOTATION X AS PATTERN "Y"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) self.annotation_regex[annotation] = tokens['value'] return tokens def has_enumerated_annotation(self, annotation): """Checks if this annotation is defined by an enumeration :param str annotation: The keyword of a annotation :rtype: bool """ return annotation in self.annotation_dict def has_regex_annotation(self, annotation): """Checks if this annotation is defined by a regular expression :param str annotation: The keyword of a annotation :rtype: bool """ return annotation in self.annotation_regex def has_annotation(self, annotation): """Checks if this annotation is defined :param str annotation: The keyword of a annotation :rtype: bool """ return self.has_enumerated_annotation( annotation) or self.has_regex_annotation(annotation) def has_enumerated_namespace(self, namespace): """Checks if this namespace is defined by an enumeration :param str namespace: The keyword of a namespace :rtype: bool """ return namespace in self.namespace_dict def has_regex_namespace(self, namespace): """Checks if this namespace is defined by a regular expression :param str namespace: The keyword of a namespace :rtype: bool """ return namespace in self.namespace_regex def has_namespace(self, namespace): """Checks if this namespace is defined :param str namespace: The keyword of a namespace :rtype: bool """ return self.has_enumerated_namespace( namespace) or self.has_regex_namespace(namespace) def raise_for_version(self, line, position, version): """Checks that a version string is valid for BEL documents, meaning it's either in the YYYYMMDD or semantic version format :param str line: The line being parsed :param int position: The position in the line being parsed :param str version: A version string :raises: VersionFormatWarning """ if valid_date_version(version): return if not SEMANTIC_VERSION_STRING_RE.match(version): raise VersionFormatWarning(self.line_number, line, position, version)
class MetadataParser(BaseParser): """A parser for the document and definitions section of a BEL document. .. seealso:: BEL 1.0 Specification for the `DEFINE <http://openbel.org/language/web/version_1.0/bel_specification_version_1.0.html#_define>`_ keyword """ def __init__( self, manager, namespace_to_term_to_encoding: Optional[ NamespaceTermEncodingMapping] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, default_namespace: Optional[Set[str]] = None, allow_redefinition: bool = False, skip_validation: bool = False, upgrade_urls: bool = False, ) -> None: """Build a metadata parser. :param manager: A cache manager :param namespace_to_term_to_encoding: An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}} :param namespace_to_pattern: A regular expression namespace mapping from {namespace keyword: regex string} :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values} :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string} :param default_namespace: A set of strings that can be used without a namespace :param skip_validation: If true, don't download and cache namespaces/annotations """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition self.skip_validation = skip_validation self.upgrade_urls = upgrade_urls #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}} self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {} #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary of {namespace keyword: regular expression string} self.namespace_to_pattern = namespace_to_pattern or {} #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_term = annotation_to_term or {} #: A dictionary of {annotation keyword: regular expression string} self.annotation_to_pattern = annotation_to_pattern or {} #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_local = annotation_to_local or {} #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value'), ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.annotation_url, self.annotation_list, self.annotation_pattern, self.namespace_pattern, ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language) def handle_document(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``SET DOCUMENT X = "Y"``. :raises: InvalidMetadataException :raises: VersionFormatWarning """ key = tokens['key'] value = tokens['value'] if key not in DOCUMENT_KEYS: raise InvalidMetadataException(self.get_line_number(), line, position, key, value) norm_key = DOCUMENT_KEYS[key] if norm_key in self.document_metadata: logger.warning('Tried to overwrite metadata: %s', key) return tokens self.document_metadata[norm_key] = value if norm_key == METADATA_VERSION: self.raise_for_version(line, position, value) return tokens def raise_for_redefined_namespace(self, line: str, position: int, namespace: str) -> None: """Raise an exception if a namespace is already defined. :raises: RedefinedNamespaceError """ if self.disallow_redefinition and self.has_namespace(namespace): raise RedefinedNamespaceError(self.get_line_number(), line, position, namespace) def handle_namespace_url(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``DEFINE NAMESPACE X AS URL "Y"``. :raises: RedefinedNamespaceError :raises: pybel.resources.exc.ResourceError """ namespace_keyword = tokens['name'] if namespace_keyword in NAMESPACE_BLACKLIST: raise ValueError('Upgrade usage to FamPlex') self.raise_for_redefined_namespace(line, position, namespace_keyword) url = tokens['url'] if self.upgrade_urls and namespace_keyword.lower() in keyword_to_url: url = keyword_to_url[namespace_keyword.lower()] self.namespace_url_dict[namespace_keyword] = url if self.skip_validation: return tokens namespace = self.manager.get_or_create_namespace(url) self.namespace_to_term_to_encoding[ namespace_keyword] = namespace.get_term_to_encodings() return tokens def handle_namespace_pattern(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``DEFINE NAMESPACE X AS PATTERN "Y"``. :raises: RedefinedNamespaceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) self.namespace_to_pattern[namespace] = re.compile(tokens['value']) return tokens def raise_for_redefined_annotation(self, line: str, position: int, annotation: str) -> None: """Raise an exception if the given annotation is already defined. :raises: RedefinedAnnotationError """ if self.disallow_redefinition and self.has_annotation(annotation): raise RedefinedAnnotationError(self.get_line_number(), line, position, annotation) def handle_annotations_url(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``DEFINE ANNOTATION X AS URL "Y"``. :raises: RedefinedAnnotationError """ keyword = tokens['name'] self.raise_for_redefined_annotation(line, position, keyword) url = tokens['url'] self.annotation_url_dict[keyword] = url if self.skip_validation: return tokens self.annotation_to_term[ keyword] = self.manager.get_annotation_entry_names(url) return tokens def handle_annotation_list(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}``. :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) self.annotation_to_local[annotation] = set(tokens['values']) return tokens def handle_annotation_pattern(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``DEFINE ANNOTATION X AS PATTERN "Y"``. :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) self.annotation_to_pattern[annotation] = re.compile(tokens['value']) return tokens def has_enumerated_annotation(self, annotation: str) -> bool: """Check if this annotation is defined by an enumeration.""" return annotation in self.annotation_to_term def has_regex_annotation(self, annotation: str) -> bool: """Check if this annotation is defined by a regular expression.""" return annotation in self.annotation_to_pattern def has_local_annotation(self, annotation: str) -> bool: """Check if this annotation is defined by an locally.""" return annotation in self.annotation_to_local def has_annotation(self, annotation: str) -> bool: """Check if this annotation is defined.""" return (self.has_enumerated_annotation(annotation) or self.has_regex_annotation(annotation) or self.has_local_annotation(annotation)) def has_enumerated_namespace(self, namespace: str) -> bool: """Check if this namespace is defined by an enumeration.""" return namespace in self.namespace_to_term_to_encoding def has_regex_namespace(self, namespace: str) -> bool: """Check if this namespace is defined by a regular expression.""" return namespace in self.namespace_to_pattern def has_namespace(self, namespace: str) -> bool: """Check if this namespace is defined.""" return self.has_enumerated_namespace( namespace) or self.has_regex_namespace(namespace) def raise_for_version(self, line: str, position: int, version: str) -> None: """Check that a version string is valid for BEL documents. This means it's either in the YYYYMMDD or semantic version format. :param line: The line being parsed :param position: The position in the line being parsed :param str version: A version string :raises: VersionFormatWarning """ if valid_date_version(version): return if not SEMANTIC_VERSION_STRING_RE.match(version): raise VersionFormatWarning(self.get_line_number(), line, position, version)
class BelParser(BaseParser): """Build a parser backed by a given dictionary of namespaces""" def __init__(self, graph, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, allow_naked_names=False, allow_nested=False, allow_unqualified_translocations=False, citation_clearing=True, no_identifier_validation=False, autostreamline=True, required_annotations=None): """ :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_dict: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :type namespace_dict: Optional[dict[str,dict[str,str]]] :param annotation_dict: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :rype annotation_dict: Optional[dict[str,set[str]]] :param namespace_regex: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :type namespace_regex: Optional[dict[str,str]] :param annotation_regex: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :type annotation_regex: Optional[dict[str,str]] :param bool allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param bool allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param bool allow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param bool autostreamline: Should the parser be streamlined on instantiation? :param Optional[list[str]] required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.control_parser = ControlParser( annotation_dict=annotation_dict, annotation_regex=annotation_regex, citation_clearing=citation_clearing, required_annotations=required_annotations, ) if no_identifier_validation: self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_dict=namespace_dict, namespace_regex=namespace_regex, ) identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = ProteinModificationParser(self.identifier_parser).language #: `2.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_variant_var>`_ self.variant = VariantParser().language #: `2.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_proteolytic_fragments>`_ self.fragment = FragmentParser().language #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = LocationParser(self.identifier_parser).language opt_location = Optional(WCW + self.location) #: DEPRECATED: `2.2.X Amino Acid Substitutions <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_amino_acid_substitutions>`_ self.psub = ProteinSubstitutionParser().language #: DEPRECATED: `2.2.X Sequence Variations <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_sequence_variations>`_ self.gsub = GeneSubstitutionParser().language #: DEPRECATED #: `Truncated proteins <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_truncated_proteins>`_ self.trunc = TruncationParser().language #: PyBEL BEL Specification variant self.gmod = GeneModificationParser( ).language # FIXME add identifier parser to this # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = FusionParser(self.identifier_parser).language # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest( ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(self.variant | self.gsub | self.gmod)) (VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group(build_legacy_fusion(identifier, 'c'))(FUSION) self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) """`2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_""" self.mirna_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(self.variant))(VARIANTS)) + opt_location self.mirna = mirna_tag + nest(self.mirna_modified) """`2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_""" self.protein_modified = ungrouped_identifier + Optional( WCW + delimitedList( Group( MatchFirst([ self.pmod, self.variant, self.fragment, self.psub, self.trunc ])))(VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group(build_legacy_fusion( identifier, 'p'))(FUSION) self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) """`2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_""" self.rna_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(self.variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group(build_legacy_fusion(identifier, 'r'))(FUSION) self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) """`2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_""" self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest( ungrouped_identifier) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + Optional(WCW + Group(self.molecular_activity)(EFFECT))) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) self.activity = self.activity_standard | self.activity_legacy """`2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_""" self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT)) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT)) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if not allow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) """`2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_""" #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() # BEL Term to BEL Term Relationships #: `3.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xincreases>`_ increases_tag = oneOf(['->', '→', 'increases' ]).setParseAction(replaceWith(INCREASES)) #: `3.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdIncreases>`_ directly_increases_tag = one_of_tags(['=>', '⇒', 'directlyIncreases'], DIRECTLY_INCREASES) #: `3.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xdecreases>`_ decreases_tag = one_of_tags(['-|', 'decreases'], DECREASES) #: `3.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdDecreases>`_ directly_decreases_tag = one_of_tags(['=|', 'directlyDecreases'], DIRECTLY_DECREASES) #: `3.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_analogous>`_ analogous_tag = one_of_tags(['analogousTo'], ANALOGOUS_TO) #: `3.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xcnc>`_ causes_no_change_tag = one_of_tags(['cnc', 'causesNoChange'], CAUSES_NO_CHANGE) #: `3.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_regulates_reg>`_ regulates_tag = one_of_tags(['reg', 'regulates'], REGULATES) #: `3.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XnegCor>`_ negative_correlation_tag = one_of_tags(['neg', 'negativeCorrelation'], NEGATIVE_CORRELATION) #: `3.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XposCor>`_ positive_correlation_tag = one_of_tags(['pos', 'positiveCorrelation'], POSITIVE_CORRELATION) #: `3.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xassociation>`_ association_tag = one_of_tags(['--', 'association'], ASSOCIATION) #: `3.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_orthologous>`_ orthologous_tag = one_of_tags(['orthologous'], ORTHOLOGOUS) #: `3.4.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_isa>`_ is_a_tag = Keyword(IS_A) #: PyBEL Variants equivalent_tag = one_of_tags(['eq', EQUIVALENT_TO], EQUIVALENT_TO) partof_tag = Keyword(PART_OF) self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ rate_limit_tag = oneOf(['rateLimitingStepOf']).setParseAction( replaceWith(RATE_LIMITING_STEP_OF)) self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ subprocess_of_tag = oneOf(['subProcessOf']).setParseAction( replaceWith(SUBPROCESS_OF)) self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ transcribed_tag = oneOf([':>', 'transcribedTo' ]).setParseAction(replaceWith(TRANSCRIBED_TO)) self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ translated_tag = oneOf(['>>', 'translatedTo' ]).setParseAction(replaceWith(TRANSLATED_TO)) self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ has_member_tag = oneOf(['hasMember' ]).setParseAction(replaceWith(HAS_MEMBER)) self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) has_members_tag = oneOf(['hasMembers']) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) has_components_tag = oneOf(['hasComponents']) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ has_component_tag = oneOf(['hasComponent']).setParseAction( replaceWith(HAS_COMPONENT)) self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance) #: `3.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biomarkerfor>`_ biomarker_tag = oneOf(['biomarkerFor' ]).setParseAction(replaceWith(BIOMARKER_FOR)) #: `3.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_prognosticbiomarkerfor>`_ prognostic_biomarker_tag = oneOf([ 'prognosticBiomarkerFor' ]).setParseAction(replaceWith(PROGONSTIC_BIOMARKER_FOR)) biomarker_tags = biomarker_tag | prognostic_biomarker_tag self.biomarker = triple(self.bel_term, biomarker_tags, self.process) has_variant_tags = oneOf(['hasVariant' ]).setParseAction(replaceWith(HAS_VARIANT)) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) has_reactant_tags = oneOf(['hasReactant' ]).setParseAction(replaceWith(HAS_REACTANT)) has_product_tags = oneOf(['hasProduct' ]).setParseAction(replaceWith(HAS_PRODUCT)) part_of_reaction_tags = has_reactant_tags | has_product_tags self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term))) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) self.label_relationship = And([ Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT) ]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BelParser, self).__init__(self.language, streamline=autostreamline) @property def namespace_dict(self): """The dictionary of {namespace: {name: encoding}} stored in the internal identifier parser :rtype: dict[str,dict[str,str]] """ return self.identifier_parser.namespace_dict @property def namespace_regex(self): """The dictionary of {namespace keyword: compiled regular expression} stored the internal identifier parser :rtype: dict[str,re] """ return self.identifier_parser.namespace_regex_compiled @property def annotation_dict(self): """A dictionary of annotations to their set of values :rtype: dict[str,set[str]] """ return self.control_parser.annotation_dict @property def annotation_regex(self): """A dictionary of annotations defined by regular expressions {annotation keyword: string regular expression} :rtype: dict[str,str] """ return self.control_parser.annotation_regex @property def allow_naked_names(self): """Should naked names be parsed, or should errors be thrown? :rtype: bool """ return self.identifier_parser.allow_naked_names def get_annotations(self): """Get current annotations in this parser :rtype: dict """ return self.control_parser.get_annotations() def clear(self): """Clears the graph and all control parser data (current citation, annotations, and statement group)""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line, position, tokens): """Handles nested statements. If :code:`allow_nested` is False, raises a warning. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: NestedRelationWarning """ if not self.allow_nested: raise NestedRelationWarning(self.line_number, line, position) self._handle_relation_harness( line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT] }) self._handle_relation_harness( line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT] }) return tokens def check_function_semantics(self, line, position, tokens): """Raises an exception if the function used on the tokens is wrong :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: InvalidFunctionSemantic """ if self.namespace_dict is None or NAMESPACE not in tokens: return tokens namespace, name = tokens[NAMESPACE], tokens[NAME] if namespace in self.namespace_regex: return tokens if self.allow_naked_names and tokens[ NAMESPACE] == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set( itt.chain.from_iterable( belns_encodings[k] for k in self.namespace_dict[namespace][name])) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic(self.line_number, line, position, tokens[FUNCTION], namespace, name, valid_functions) return tokens def handle_term(self, line, position, tokens): """Handles BEL terms (the subject and object of BEL relations) :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens, relation): """Provides the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`""" parent_node_tuple, parent_node_attr = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_tuple, child_node_attr = self.ensure_node(child_tokens) self.graph.add_unqualified_edge(parent_node_tuple, child_node_tuple, relation) return tokens def handle_has_members(self, line, position, tokens): """Handles list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ return self._handle_list_helper(tokens, HAS_MEMBER) def handle_has_components(self, line, position, tokens): """Handles list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ return self._handle_list_helper(tokens, HAS_COMPONENT) def _add_qualified_edge_helper(self, u, v, relation, annotations, subject_modifier, object_modifier): """Adds a qualified edge from the internal aspects of the parser""" self.graph.add_qualified_edge( u, v, relation=relation, evidence=self.control_parser.evidence, citation=self.control_parser.citation.copy(), annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **{LINE: self.line_number}) def _add_qualified_edge(self, u, v, relation, annotations, subject_modifier, object_modifier): """Adds an edge, then adds the opposite direction edge if it should""" self._add_qualified_edge_helper( u, v, relation=relation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper( v, u, relation=relation, annotations=annotations, object_modifier=subject_modifier, subject_modifier=object_modifier, ) def _handle_relation(self, tokens): """A policy in which all annotations are stored as sets, including single annotations :param pyparsing.ParseResult tokens: The tokens from PyParsing """ subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT]) object_node_tuple, _ = self.ensure_node(tokens[OBJECT]) subject_modifier = modifier_po_to_dict(tokens[SUBJECT]) object_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = { annotation_name: ({ae: True for ae in annotation_entry} if isinstance( annotation_entry, set) else { annotation_entry: True }) for annotation_name, annotation_entry in self.control_parser.annotations.items() } self._add_qualified_edge( subject_node_tuple, object_node_tuple, relation=tokens[RELATION], annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) def _handle_relation_harness(self, line, position, tokens): """Handles BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ if not self.control_parser.citation: raise MissingCitationException(self.line_number, line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.line_number, line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations( ) if missing_required_annotations: raise MissingAnnotationWarning(self.line_number, line, position, missing_required_annotations) self._handle_relation(tokens) return tokens def handle_unqualified_relation(self, line, position, tokens): """Handles unqualified relations :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT]) object_node_tuple, _ = self.ensure_node(tokens[OBJECT]) rel = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_tuple, object_node_tuple, rel) def handle_label_relation(self, line, position, tokens): """Handles statements like ``p(X) label "Label for X"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RelabelWarning """ subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT]) description = tokens[OBJECT] if self.graph.has_node_description(subject_node_tuple): raise RelabelWarning( line_number=self.line_number, line=line, position=position, node=self.graph.node, old_label=self.graph.get_node_description(subject_node_tuple), new_label=description) self.graph.set_node_description(subject_node_tuple, description) def ensure_node(self, tokens): """Turns parsed tokens into canonical node name and makes sure its in the graph :param pyparsing.ParseResult tokens: Tokens from PyParsing :return: A pair of the PyBEL node tuple and the PyBEL node data dictionary :rtype: tuple[tuple, dict] """ if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node_attr_dict = po_to_dict(tokens) node_tuple = self.graph.add_node_from_data(node_attr_dict) return node_tuple, node_attr_dict def handle_translocation_illegal(self, line, position, tokens): raise MalformedTranslocationWarning(self.line_number, line, position, tokens)
def create_grammar(container_ids, secret_ids): """ Create the grammar for the editfile """ from pyparsing import (nums, alphas, lineEnd, stringEnd, OneOrMore, ZeroOrMore, SkipTo, Optional, And, Word, CharsNotIn, Empty, QuotedString, Literal, Suppress, Group, Combine, originalTextFor, Forward, ParserElement) # Read from bottom to top whiteSpaceChars = ' \t' ParserElement.setDefaultWhitespaceChars(whiteSpaceChars) word = Empty() + CharsNotIn(whiteSpaceChars + '\n') quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction( # NOTE the second replace is a work-around for # pyparsing bug #68. # https://sourceforge.net/p/pyparsing/bugs/68/ lambda s, l, t: t[0].replace("\\n", "\n").replace("\\\\", "\\")) def secretIdNumberParseAction(s, loc, tokens): v = int(tokens[0]) if not v in secret_ids: raise ParseException(s, loc, "Not a valid secret id") return v secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction) def containerIdParseAction(s, loc, tokens): v = int(tokens[0]) if not v in container_ids: raise ParseException(s, loc, "Not a valid container id") return v containerId = Word(nums).setParseAction(containerIdParseAction) key = quotedString | word secretString = ~Literal('#') + (quotedString | word) secretId = Suppress('#') + secretIdNumber secret = secretString | secretId note = quotedString | originalTextFor(OneOrMore(word)) containerKeyword = Suppress('CONTAINER') entry = (~containerKeyword + Group(key - secret - Optional(note)) - Suppress(lineEnd)) comment = Suppress(lineEnd | '#' + SkipTo(lineEnd)) line = comment | entry containerLine = containerKeyword + containerId + comment # Instead of the following recursive grammar, we could have simply used # # containerBlock = ZeroOrMore(comment) + Group(containerLine # + Group(OneOrMore(line))) # multipleContainers = OneOrMore(containerBlock) # # but then any parsing error in line will result in a "expected stringEnd" # or "expected CONTAINER". _multipleContainers_head = Forward() _multipleContainers_body = Forward() _multipleContainers_head << (stringEnd | comment + _multipleContainers_head | containerLine + _multipleContainers_body) _multipleContainers_body << ( stringEnd | (containerLine | line) + _multipleContainers_body) _multipleContainers_entry = And([entry]) multipleContainers = And([_multipleContainers_head]) # TODO ibidem below containerLine.setParseAction(lambda s, l, t: [[None, t[0]]]) def multipleContainersParseAction(s, loc, tokens): curEntries = [] curId = None ret = [] for t in tuple(tokens) + ((None, None), ): if t[0] is not None: assert curId is not None curEntries.append(t) continue if curId is not None: ret.append([curId, curEntries]) curId = t[1] curEntries = [] return ret multipleContainers.setParseAction(multipleContainersParseAction) oneContainer = ZeroOrMore(line) + stringEnd oneContainer.setParseAction(lambda s, l, t: [[None, t]]) grammar = multipleContainers | oneContainer return grammar
def create_grammar(container_ids, secret_ids): """ Create the grammar for the editfile """ from pyparsing import (nums, alphas, lineEnd, stringEnd, OneOrMore, ZeroOrMore, SkipTo, Optional, And, Word, CharsNotIn, Empty, QuotedString, Literal, Suppress, Group, Combine, originalTextFor, Forward, ParserElement) # Read from bottom to top whiteSpaceChars = ' \t' ParserElement.setDefaultWhitespaceChars(whiteSpaceChars) word = Empty() + CharsNotIn(whiteSpaceChars + '\n') quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction( # NOTE the second replace is a work-around for # pyparsing bug #68. # https://sourceforge.net/p/pyparsing/bugs/68/ lambda s,l,t: t[0].replace("\\n", "\n").replace( "\\\\", "\\")) def secretIdNumberParseAction(s, loc, tokens): v = int(tokens[0]) if not v in secret_ids: raise ParseException(s, loc, "Not a valid secret id") return v secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction) def containerIdParseAction(s, loc, tokens): v = int(tokens[0]) if not v in container_ids: raise ParseException(s, loc, "Not a valid container id") return v containerId = Word(nums).setParseAction(containerIdParseAction) key = quotedString | word secretString = ~Literal('#') + (quotedString | word) secretId = Suppress('#') + secretIdNumber secret = secretString | secretId note = quotedString | originalTextFor(OneOrMore(word)) containerKeyword = Suppress('CONTAINER') entry = (~containerKeyword + Group(key - secret - Optional(note)) - Suppress(lineEnd)) comment = Suppress(lineEnd | '#' + SkipTo(lineEnd)) line = comment | entry containerLine = containerKeyword + containerId + comment # Instead of the following recursive grammar, we could have simply used # # containerBlock = ZeroOrMore(comment) + Group(containerLine # + Group(OneOrMore(line))) # multipleContainers = OneOrMore(containerBlock) # # but then any parsing error in line will result in a "expected stringEnd" # or "expected CONTAINER". _multipleContainers_head = Forward() _multipleContainers_body = Forward() _multipleContainers_head << (stringEnd | comment + _multipleContainers_head | containerLine + _multipleContainers_body) _multipleContainers_body << (stringEnd | (containerLine | line) + _multipleContainers_body) _multipleContainers_entry = And([entry]) multipleContainers = And([_multipleContainers_head]) # TODO ibidem below containerLine.setParseAction(lambda s,l,t: [[None, t[0]]]) def multipleContainersParseAction(s, loc, tokens): curEntries = [] curId = None ret = [] for t in tuple(tokens) + ((None, None),): if t[0] is not None: assert curId is not None curEntries.append(t) continue if curId is not None: ret.append([curId, curEntries]) curId = t[1] curEntries = [] return ret multipleContainers.setParseAction(multipleContainersParseAction) oneContainer = ZeroOrMore(line) + stringEnd oneContainer.setParseAction(lambda s,l,t: [[None, t]]) grammar = multipleContainers | oneContainer return grammar