コード例 #1
0
class BELParser(BaseParser):
    """Build a parser backed by a given dictionary of namespaces."""
    def __init__(
        self,
        graph,
        namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        allow_naked_names: bool = False,
        allow_nested: bool = False,
        disallow_unqualified_translocations: bool = False,
        citation_clearing: bool = True,
        skip_validation: bool = False,
        autostreamline: bool = True,
        required_annotations: Optional[List[str]] = None,
    ) -> None:
        """Build a BEL parser.

        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :param allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param allow_nested: If true, turn off nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param autostreamline: Should the parser be streamlined on instantiation?
        :param required_annotations: Optional list of required annotations
        """
        self.graph = graph

        self.allow_nested = allow_nested
        self.disallow_unqualified_translocations = disallow_unqualified_translocations

        if skip_validation:
            self.control_parser = ControlParser(
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.control_parser = ControlParser(
                annotation_to_term=annotation_to_term,
                annotation_to_pattern=annotation_to_pattern,
                annotation_to_local=annotation_to_local,
                citation_clearing=citation_clearing,
                required_annotations=required_annotations,
            )

            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names,
                namespace_to_term=namespace_to_term,
                namespace_to_pattern=namespace_to_pattern,
            )

        self.control_parser.get_line_number = self.get_line_number
        self.identifier_parser.get_line_number = self.get_line_number

        identifier = Group(self.identifier_parser.language)(IDENTIFIER)
        ungrouped_identifier = self.identifier_parser.language

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = get_protein_modification_language(
            self.identifier_parser.identifier_qualified)

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = get_location_language(self.identifier_parser.language)
        opt_location = pyparsing.Optional(WCW + self.location)

        #: PyBEL BEL Specification variant
        self.gmod = get_gene_modification_language(
            self.identifier_parser.identifier_qualified)

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = get_fusion_language(self.identifier_parser.language)

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(
            ungrouped_identifier + opt_location)

        self.gene_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS))

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'c'))(FUSION)

        #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_
        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion, self.gene_fusion_legacy, self.gene_modified
            ]) + opt_location)

        self.mirna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location

        #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_
        self.mirna = mirna_tag + nest(self.mirna_modified)

        self.protein_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(
                Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))
            (VARIANTS))

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'p'))(FUSION)

        #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_
        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location)

        self.rna_modified = ungrouped_identifier + pyparsing.Optional(
            WCW + delimitedList(Group(variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(
            get_legacy_fusion_langauge(identifier, 'r'))(FUSION)

        #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_
        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location)

        self.single_abundance = MatchFirst([
            self.general_abundance, self.gene, self.mirna, self.protein,
            self.rna
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(ungrouped_identifier +
                                                    opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location)

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location)

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default)

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.identifier_parser.language)

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(
            ungrouped_identifier)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(ungrouped_identifier)

        self.bp_path = self.biological_process | self.pathology
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT)))

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_
        self.activity = self.activity_standard | self.activity_legacy

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT))

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT))

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if self.disallow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_
        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified, self.translocation_standard,
            self.translocation_legacy
        ])

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion, self.cell_surface_expression,
            self.translocation, self.degradation, self.reaction
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag, self.biological_process)

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag, self.process)

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        self.has_component = triple(
            self.complex_abundances | self.composite_abundance,
            has_component_tag, self.abundance)

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])

        self.relation.setParseAction(self._handle_relation_harness)

        self.unqualified_relation = MatchFirst([
            self.has_member, self.has_component, self.has_variant_relation,
            self.part_of_reaction
        ])

        self.unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested. Not enabled by default.
        causal_relation_tags = MatchFirst([
            increases_tag, decreases_tag, directly_decreases_tag,
            directly_increases_tag
        ])

        self.nested_causal_relationship = triple(
            self.bel_term, causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)))

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        self.label_relationship = And([
            Group(self.bel_term)(SUBJECT),
            Suppress('labeled'),
            quote(OBJECT)
        ])
        self.label_relationship.setParseAction(self.handle_label_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.unqualified_relation,
            self.label_relationship,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BELParser, self).__init__(self.language,
                                        streamline=autostreamline)

    @property
    def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]:
        """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser."""
        return self.identifier_parser.namespace_to_terms

    @property
    def _allow_naked_names(self) -> bool:
        """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``)."""
        return self.identifier_parser.allow_naked_names

    def get_annotations(self) -> Dict:
        """Get the current annotations in this parser."""
        return self.control_parser.get_annotations()

    def clear(self):
        """Clear the graph and all control parser data (current citation, annotations, and statement group)."""
        self.graph.clear()
        self.control_parser.clear()

    def handle_nested_relation(self, line: str, position: int,
                               tokens: ParseResults):
        """Handle nested statements.

        If :code:`allow_nested` is False, raises a ``NestedRelationWarning``.

        :raises: NestedRelationWarning
        """
        if not self.allow_nested:
            raise NestedRelationWarning(self.get_line_number(), line, position)

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[SUBJECT],
                RELATION: tokens[RELATION],
                OBJECT: tokens[OBJECT][SUBJECT],
            })

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[OBJECT][SUBJECT],
                RELATION: tokens[OBJECT][RELATION],
                OBJECT: tokens[OBJECT][OBJECT],
            })
        return tokens

    def check_function_semantics(self, line: str, position: int,
                                 tokens: ParseResults) -> ParseResults:
        """Raise an exception if the function used on the tokens is wrong.

        :raises: InvalidFunctionSemantic
        """
        if not self._namespace_dict or NAMESPACE not in tokens:
            return tokens

        namespace, name = tokens[NAMESPACE], tokens[NAME]

        if namespace in self.identifier_parser.namespace_to_pattern:
            return tokens

        if self._allow_naked_names and tokens[
                NAMESPACE] == DIRTY:  # Don't check dirty names in lenient mode
            return tokens

        valid_functions = set(
            itt.chain.from_iterable(
                belns_encodings.get(k, set())
                for k in self._namespace_dict[namespace][name]))

        if not valid_functions:
            raise InvalidEntity(self.get_line_number(), line, position,
                                namespace, name)

        if tokens[FUNCTION] not in valid_functions:
            raise InvalidFunctionSemantic(self.get_line_number(), line,
                                          position, tokens[FUNCTION],
                                          namespace, name, valid_functions)

        return tokens

    def handle_term(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle BEL terms (the subject and object of BEL relations)."""
        self.ensure_node(tokens)
        return tokens

    def _handle_list_helper(self, tokens: ParseResults,
                            relation: str) -> ParseResults:
        """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`."""
        parent_node_dsl = self.ensure_node(tokens[0])

        for child_tokens in tokens[2]:
            child_node_dsl = self.ensure_node(child_tokens)
            self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl,
                                            relation)

        return tokens

    def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, HAS_MEMBER)

    def handle_has_components(self, _, __,
                              tokens: ParseResults) -> ParseResults:
        """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``."""
        return self._handle_list_helper(tokens, HAS_COMPONENT)

    def _add_qualified_edge_helper(self, u, v, relation, annotations,
                                   subject_modifier, object_modifier) -> str:
        """Add a qualified edge from the internal aspects of the parser."""
        return self.graph.add_qualified_edge(
            u,
            v,
            relation=relation,
            evidence=self.control_parser.evidence,
            citation=self.control_parser.citation.copy(),
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
            **{LINE: self.get_line_number()})

    def _add_qualified_edge(self, u, v, relation, annotations,
                            subject_modifier, object_modifier) -> str:
        """Add an edge, then adds the opposite direction edge if it should."""
        sha512 = self._add_qualified_edge_helper(
            u,
            v,
            relation=relation,
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

        if relation in TWO_WAY_RELATIONS:
            self._add_qualified_edge_helper(
                v,
                u,
                relation=relation,
                annotations=annotations,
                object_modifier=subject_modifier,
                subject_modifier=object_modifier,
            )

        return sha512

    def _handle_relation(self, tokens: ParseResults) -> str:
        """Handle a relation."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])

        subject_modifier = modifier_po_to_dict(tokens[SUBJECT])
        object_modifier = modifier_po_to_dict(tokens[OBJECT])

        annotations = {
            annotation_name: ({ae: True
                               for ae in annotation_entry} if isinstance(
                                   annotation_entry, set) else {
                                       annotation_entry: True
                                   })
            for annotation_name, annotation_entry in
            self.control_parser.annotations.items()
        }

        return self._add_qualified_edge(
            subject_node_dsl,
            object_node_dsl,
            relation=tokens[RELATION],
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

    def _handle_relation_harness(
            self, line: str, position: int,
            tokens: Union[ParseResults, Dict]) -> ParseResults:
        """Handle BEL relations based on the policy specified on instantiation.

        Note: this can't be changed after instantiation!
        """
        if not self.control_parser.citation:
            raise MissingCitationException(self.get_line_number(), line,
                                           position)

        if not self.control_parser.evidence:
            raise MissingSupportWarning(self.get_line_number(), line, position)

        missing_required_annotations = self.control_parser.get_missing_required_annotations(
        )
        if missing_required_annotations:
            raise MissingAnnotationWarning(self.get_line_number(), line,
                                           position,
                                           missing_required_annotations)

        self._handle_relation(tokens)
        return tokens

    def handle_unqualified_relation(self, _, __,
                                    tokens: ParseResults) -> ParseResults:
        """Handle unqualified relations."""
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        object_node_dsl = self.ensure_node(tokens[OBJECT])
        relation = tokens[RELATION]
        self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl,
                                        relation)
        return tokens

    def handle_label_relation(self, line: str, position: int,
                              tokens: ParseResults) -> ParseResults:
        """Handle statements like ``p(X) label "Label for X"``.

        :raises: RelabelWarning
        """
        subject_node_dsl = self.ensure_node(tokens[SUBJECT])
        description = tokens[OBJECT]

        if self.graph.has_node_description(subject_node_dsl):
            raise RelabelWarning(
                line_number=self.get_line_number(),
                line=line,
                position=position,
                node=self.graph.node,
                old_label=self.graph.get_node_description(subject_node_dsl),
                new_label=description)

        self.graph.set_node_description(subject_node_dsl, description)
        return tokens

    def ensure_node(self, tokens: ParseResults) -> BaseEntity:
        """Turn parsed tokens into canonical node name and makes sure its in the graph."""
        if MODIFIER in tokens:
            return self.ensure_node(tokens[TARGET])

        node = parse_result_to_dsl(tokens)
        self.graph.add_node_from_data(node)
        return node

    def handle_translocation_illegal(self, line: str, position: int,
                                     tokens: ParseResults) -> None:
        """Handle a malformed translocation."""
        raise MalformedTranslocationWarning(self.get_line_number(), line,
                                            position, tokens)
コード例 #2
0
ファイル: parse_metadata.py プロジェクト: bgyori/pybel
class MetadataParser(BaseParser):
    """A parser for the document and definitions section of a BEL document.

    .. seealso::

        BEL 1.0 Specification for the `DEFINE <http://openbel.org/language/web/version_1.0/bel_specification_version_1.0.html#_define>`_ keyword
    """
    def __init__(self,
                 manager,
                 namespace_dict=None,
                 annotation_dict=None,
                 namespace_regex=None,
                 annotation_regex=None,
                 default_namespace=None,
                 allow_redefinition=False):
        """
        :param pybel.manager.Manager manager: A cache manager
        :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from
                                {namespace keyword: {name: encoding}}
        :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from
                                {annotation keyword: set of valid values}
        :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from
                                {namespace keyword: regex string}
        :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from
                                {annotation keyword: regex string}
        :param set[str] default_namespace: A set of strings that can be used without a namespace
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager

        self.disallow_redefinition = not allow_redefinition

        #: A dictionary of cached {namespace keyword: {name: encoding}}
        self.namespace_dict = {} if namespace_dict is None else namespace_dict
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_dict = {} if annotation_dict is None else annotation_dict
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_regex = {} if namespace_regex is None else namespace_regex
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_regex = {} if annotation_regex is None else annotation_regex

        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {namespace keyword: OWL namespace URL}
        self.namespace_owl_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}
        #: A dictionary from {annotation keyword: OWL annotation URL}
        self.annotation_owl_dict = {}
        #: A set of annotation keywords that are defined ad-hoc in the BEL script
        self.annotation_lists = set()

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value')
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_owl = And([
            namespace_tag, owl_tag,
            Optional(function_tags('functions')),
            quote('url')
        ])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_owl = And([annotation_tag, owl_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_owl.setParseAction(self.handle_namespace_owl)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_owl.setParseAction(self.handle_annotation_owl)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document, self.namespace_url, self.namespace_owl,
            self.annotation_url, self.annotation_list, self.annotation_owl,
            self.annotation_pattern, self.namespace_pattern
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)

    def handle_document(self, line, position, tokens):
        """Handles statements like ``SET DOCUMENT X = "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        key = tokens['key']
        value = tokens['value']

        if key not in DOCUMENT_KEYS:
            raise InvalidMetadataException(self.line_number, line, position,
                                           key, value)

        norm_key = DOCUMENT_KEYS[key]

        if norm_key in self.document_metadata:
            log.warning('Tried to overwrite metadata: %s', key)
            return tokens

        self.document_metadata[norm_key] = value

        if norm_key == METADATA_VERSION:
            self.raise_for_version(line, position, value)

        return tokens

    def raise_for_redefined_namespace(self, line, position, namespace):
        """Raises an exception if a namespace is already defined

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param str namespace: The namespace being parsed
        :raises: RedefinedNamespaceError
        """
        if self.disallow_redefinition and self.has_namespace(namespace):
            raise RedefinedNamespaceError(self.line_number, line, position,
                                          namespace)

    def handle_namespace_url(self, line, position, tokens):
        """Handles statements like ``DEFINE NAMESPACE X AS URL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedNamespaceError
        :raises: pybel.resources.exc.ResourceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)

        url = tokens['url']

        namespace_result = self.manager.ensure_namespace(url)

        if isinstance(namespace_result, dict):
            self.namespace_dict[namespace] = namespace_result
            self.uncachable_namespaces.add(url)
        else:
            self.namespace_dict[namespace] = namespace_result.to_values()

        self.namespace_url_dict[namespace] = url

        return tokens

    def handle_namespace_owl(self, line, position, tokens):
        """Handles statements like ``DEFINE NAMESPACE X AS OWL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedNamespaceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)

        functions = str(tokens['functions']
                        ) if 'functions' in tokens else BELNS_ENCODING_STR

        url = tokens['url']

        terms = self.manager.get_namespace_owl_terms(url, namespace)

        self.namespace_dict[namespace] = {term: functions for term in terms}
        self.namespace_owl_dict[namespace] = url

        return tokens

    def handle_namespace_pattern(self, line, position, tokens):
        """Handles statements like ``DEFINE NAMESPACE X AS PATTERN "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedNamespaceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)

        self.namespace_regex[namespace] = tokens['value']

        return tokens

    def raise_for_redefined_annotation(self, line, position, annotation):
        """Raises an exception if the given annotation is already defined

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param str annotation: The annotation being parsed
        :raises: RedefinedAnnotationError
        """
        if self.disallow_redefinition and self.has_annotation(annotation):
            raise RedefinedAnnotationError(self.line_number, line, position,
                                           annotation)

    def handle_annotation_owl(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS OWL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)

        url = tokens['url']
        self.annotation_dict[
            annotation] = self.manager.get_annotation_owl_terms(
                url, annotation)
        self.annotation_owl_dict[annotation] = url

        return tokens

    def handle_annotations_url(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS URL "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        keyword = tokens['name']
        self.raise_for_redefined_annotation(line, position, keyword)

        url = tokens['url']
        self.annotation_dict[keyword] = self.manager.get_annotation_entries(
            url)
        self.annotation_url_dict[keyword] = url

        return tokens

    def handle_annotation_list(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)

        values = set(tokens['values'])

        self.annotation_dict[annotation] = values
        self.annotation_lists.add(annotation)

        return tokens

    def handle_annotation_pattern(self, line, position, tokens):
        """Handles statements like ``DEFINE ANNOTATION X AS PATTERN "Y"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)
        self.annotation_regex[annotation] = tokens['value']
        return tokens

    def has_enumerated_annotation(self, annotation):
        """Checks if this annotation is defined by an enumeration

        :param str annotation: The keyword of a annotation
        :rtype: bool
        """
        return annotation in self.annotation_dict

    def has_regex_annotation(self, annotation):
        """Checks if this annotation is defined by a regular expression

        :param str annotation: The keyword of a annotation
        :rtype: bool
        """
        return annotation in self.annotation_regex

    def has_annotation(self, annotation):
        """Checks if this annotation is defined

        :param str annotation: The keyword of a annotation
        :rtype: bool
        """
        return self.has_enumerated_annotation(
            annotation) or self.has_regex_annotation(annotation)

    def has_enumerated_namespace(self, namespace):
        """Checks if this namespace is defined by an enumeration

        :param str namespace: The keyword of a namespace
        :rtype: bool
        """
        return namespace in self.namespace_dict

    def has_regex_namespace(self, namespace):
        """Checks if this namespace is defined by a regular expression

        :param str namespace: The keyword of a namespace
        :rtype: bool
        """
        return namespace in self.namespace_regex

    def has_namespace(self, namespace):
        """Checks if this namespace is defined

        :param str namespace: The keyword of a namespace
        :rtype: bool
        """
        return self.has_enumerated_namespace(
            namespace) or self.has_regex_namespace(namespace)

    def raise_for_version(self, line, position, version):
        """Checks that a version string is valid for BEL documents, meaning it's either in the YYYYMMDD or semantic version
        format

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param str version: A version string
        :raises: VersionFormatWarning
        """
        if valid_date_version(version):
            return

        if not SEMANTIC_VERSION_STRING_RE.match(version):
            raise VersionFormatWarning(self.line_number, line, position,
                                       version)
コード例 #3
0
ファイル: parse_metadata.py プロジェクト: shuvrobiswas/pybel
class MetadataParser(BaseParser):
    """A parser for the document and definitions section of a BEL document.

    .. seealso::

        BEL 1.0 Specification for the `DEFINE <http://openbel.org/language/web/version_1.0/bel_specification_version_1.0.html#_define>`_ keyword
    """
    def __init__(
        self,
        manager,
        namespace_to_term_to_encoding: Optional[
            NamespaceTermEncodingMapping] = None,
        namespace_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_term: Optional[Mapping[str, Set[str]]] = None,
        annotation_to_pattern: Optional[Mapping[str, Pattern]] = None,
        annotation_to_local: Optional[Mapping[str, Set[str]]] = None,
        default_namespace: Optional[Set[str]] = None,
        allow_redefinition: bool = False,
        skip_validation: bool = False,
        upgrade_urls: bool = False,
    ) -> None:
        """Build a metadata parser.

        :param manager: A cache manager
        :param namespace_to_term_to_encoding:
          An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}}
        :param namespace_to_pattern:
          A regular expression namespace mapping from {namespace keyword: regex string}
        :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values}
        :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string}
        :param default_namespace: A set of strings that can be used without a namespace
        :param skip_validation: If true, don't download and cache namespaces/annotations
        """
        #: This metadata parser's internal definition cache manager
        self.manager = manager
        self.disallow_redefinition = not allow_redefinition
        self.skip_validation = skip_validation
        self.upgrade_urls = upgrade_urls

        #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}}
        self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {}
        #: A set of namespaces's URLs that can't be cached
        self.uncachable_namespaces = set()
        #: A dictionary of {namespace keyword: regular expression string}
        self.namespace_to_pattern = namespace_to_pattern or {}
        #: A set of names that can be used without a namespace
        self.default_namespace = set(
            default_namespace) if default_namespace is not None else None

        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_term = annotation_to_term or {}
        #: A dictionary of {annotation keyword: regular expression string}
        self.annotation_to_pattern = annotation_to_pattern or {}
        #: A dictionary of cached {annotation keyword: set of values}
        self.annotation_to_local = annotation_to_local or {}

        #: A dictionary containing the document metadata
        self.document_metadata = {}

        #: A dictionary from {namespace keyword: BEL namespace URL}
        self.namespace_url_dict = {}
        #: A dictionary from {annotation keyword: BEL annotation URL}
        self.annotation_url_dict = {}

        self.document = And([
            set_tag,
            Suppress(BEL_KEYWORD_DOCUMENT),
            word('key'),
            Suppress('='),
            qid('value'),
        ])

        namespace_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_NAMESPACE),
            ppc.identifier('name'), as_tag
        ])
        self.namespace_url = And([namespace_tag, url_tag, quote('url')])
        self.namespace_pattern = And(
            [namespace_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        annotation_tag = And([
            define_tag,
            Suppress(BEL_KEYWORD_ANNOTATION),
            ppc.identifier('name'), as_tag
        ])
        self.annotation_url = And([annotation_tag, url_tag, quote('url')])
        self.annotation_list = And(
            [annotation_tag, list_tag,
             delimited_quoted_list('values')])
        self.annotation_pattern = And(
            [annotation_tag,
             Suppress(BEL_KEYWORD_PATTERN),
             quote('value')])

        self.document.setParseAction(self.handle_document)
        self.namespace_url.setParseAction(self.handle_namespace_url)
        self.namespace_pattern.setParseAction(self.handle_namespace_pattern)
        self.annotation_url.setParseAction(self.handle_annotations_url)
        self.annotation_list.setParseAction(self.handle_annotation_list)
        self.annotation_pattern.setParseAction(self.handle_annotation_pattern)

        self.language = MatchFirst([
            self.document,
            self.namespace_url,
            self.annotation_url,
            self.annotation_list,
            self.annotation_pattern,
            self.namespace_pattern,
        ]).setName('BEL Metadata')

        super(MetadataParser, self).__init__(self.language)

    def handle_document(self, line: str, position: int,
                        tokens: ParseResults) -> ParseResults:
        """Handle statements like ``SET DOCUMENT X = "Y"``.

        :raises: InvalidMetadataException
        :raises: VersionFormatWarning
        """
        key = tokens['key']
        value = tokens['value']

        if key not in DOCUMENT_KEYS:
            raise InvalidMetadataException(self.get_line_number(), line,
                                           position, key, value)

        norm_key = DOCUMENT_KEYS[key]

        if norm_key in self.document_metadata:
            logger.warning('Tried to overwrite metadata: %s', key)
            return tokens

        self.document_metadata[norm_key] = value

        if norm_key == METADATA_VERSION:
            self.raise_for_version(line, position, value)

        return tokens

    def raise_for_redefined_namespace(self, line: str, position: int,
                                      namespace: str) -> None:
        """Raise an exception if a namespace is already defined.

        :raises: RedefinedNamespaceError
        """
        if self.disallow_redefinition and self.has_namespace(namespace):
            raise RedefinedNamespaceError(self.get_line_number(), line,
                                          position, namespace)

    def handle_namespace_url(self, line: str, position: int,
                             tokens: ParseResults) -> ParseResults:
        """Handle statements like ``DEFINE NAMESPACE X AS URL "Y"``.

        :raises: RedefinedNamespaceError
        :raises: pybel.resources.exc.ResourceError
        """
        namespace_keyword = tokens['name']

        if namespace_keyword in NAMESPACE_BLACKLIST:
            raise ValueError('Upgrade usage to FamPlex')

        self.raise_for_redefined_namespace(line, position, namespace_keyword)

        url = tokens['url']
        if self.upgrade_urls and namespace_keyword.lower() in keyword_to_url:
            url = keyword_to_url[namespace_keyword.lower()]

        self.namespace_url_dict[namespace_keyword] = url

        if self.skip_validation:
            return tokens

        namespace = self.manager.get_or_create_namespace(url)
        self.namespace_to_term_to_encoding[
            namespace_keyword] = namespace.get_term_to_encodings()

        return tokens

    def handle_namespace_pattern(self, line: str, position: int,
                                 tokens: ParseResults) -> ParseResults:
        """Handle statements like ``DEFINE NAMESPACE X AS PATTERN "Y"``.

        :raises: RedefinedNamespaceError
        """
        namespace = tokens['name']
        self.raise_for_redefined_namespace(line, position, namespace)
        self.namespace_to_pattern[namespace] = re.compile(tokens['value'])
        return tokens

    def raise_for_redefined_annotation(self, line: str, position: int,
                                       annotation: str) -> None:
        """Raise an exception if the given annotation is already defined.

        :raises: RedefinedAnnotationError
        """
        if self.disallow_redefinition and self.has_annotation(annotation):
            raise RedefinedAnnotationError(self.get_line_number(), line,
                                           position, annotation)

    def handle_annotations_url(self, line: str, position: int,
                               tokens: ParseResults) -> ParseResults:
        """Handle statements like ``DEFINE ANNOTATION X AS URL "Y"``.

        :raises: RedefinedAnnotationError
        """
        keyword = tokens['name']
        self.raise_for_redefined_annotation(line, position, keyword)

        url = tokens['url']
        self.annotation_url_dict[keyword] = url

        if self.skip_validation:
            return tokens

        self.annotation_to_term[
            keyword] = self.manager.get_annotation_entry_names(url)

        return tokens

    def handle_annotation_list(self, line: str, position: int,
                               tokens: ParseResults) -> ParseResults:
        """Handle statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}``.

        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)
        self.annotation_to_local[annotation] = set(tokens['values'])
        return tokens

    def handle_annotation_pattern(self, line: str, position: int,
                                  tokens: ParseResults) -> ParseResults:
        """Handle statements like ``DEFINE ANNOTATION X AS PATTERN "Y"``.

        :raises: RedefinedAnnotationError
        """
        annotation = tokens['name']
        self.raise_for_redefined_annotation(line, position, annotation)
        self.annotation_to_pattern[annotation] = re.compile(tokens['value'])
        return tokens

    def has_enumerated_annotation(self, annotation: str) -> bool:
        """Check if this annotation is defined by an enumeration."""
        return annotation in self.annotation_to_term

    def has_regex_annotation(self, annotation: str) -> bool:
        """Check if this annotation is defined by a regular expression."""
        return annotation in self.annotation_to_pattern

    def has_local_annotation(self, annotation: str) -> bool:
        """Check if this annotation is defined by an locally."""
        return annotation in self.annotation_to_local

    def has_annotation(self, annotation: str) -> bool:
        """Check if this annotation is defined."""
        return (self.has_enumerated_annotation(annotation)
                or self.has_regex_annotation(annotation)
                or self.has_local_annotation(annotation))

    def has_enumerated_namespace(self, namespace: str) -> bool:
        """Check if this namespace is defined by an enumeration."""
        return namespace in self.namespace_to_term_to_encoding

    def has_regex_namespace(self, namespace: str) -> bool:
        """Check if this namespace is defined by a regular expression."""
        return namespace in self.namespace_to_pattern

    def has_namespace(self, namespace: str) -> bool:
        """Check if this namespace is defined."""
        return self.has_enumerated_namespace(
            namespace) or self.has_regex_namespace(namespace)

    def raise_for_version(self, line: str, position: int,
                          version: str) -> None:
        """Check that a version string is valid for BEL documents.

        This means it's either in the YYYYMMDD or semantic version format.

        :param line: The line being parsed
        :param position: The position in the line being parsed
        :param str version: A version string
        :raises: VersionFormatWarning
        """
        if valid_date_version(version):
            return

        if not SEMANTIC_VERSION_STRING_RE.match(version):
            raise VersionFormatWarning(self.get_line_number(), line, position,
                                       version)
コード例 #4
0
class BelParser(BaseParser):
    """Build a parser backed by a given dictionary of namespaces"""
    def __init__(self,
                 graph,
                 namespace_dict=None,
                 annotation_dict=None,
                 namespace_regex=None,
                 annotation_regex=None,
                 allow_naked_names=False,
                 allow_nested=False,
                 allow_unqualified_translocations=False,
                 citation_clearing=True,
                 no_identifier_validation=False,
                 autostreamline=True,
                 required_annotations=None):
        """
        :param pybel.BELGraph graph: The BEL Graph to use to store the network
        :param namespace_dict: A dictionary of {namespace: {name: encoding}}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :type namespace_dict: Optional[dict[str,dict[str,str]]]
        :param annotation_dict: A dictionary of {annotation: set of values}. Delegated to
         :class:`pybel.parser.ControlParser`
        :rype annotation_dict: Optional[dict[str,set[str]]]
        :param namespace_regex: A dictionary of {namespace: regular expression strings}. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :type namespace_regex: Optional[dict[str,str]]
        :param annotation_regex: A dictionary of {annotation: regular expression strings}. Delegated to
         :class:`pybel.parser.ControlParser`
        :type annotation_regex: Optional[dict[str,str]]
        :param bool allow_naked_names: If true, turn off naked namespace failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param bool allow_nested: If true, turn off nested statement failures. Delegated to
         :class:`pybel.parser.parse_identifier.IdentifierParser`
        :param bool allow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
        :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
         Delegated to :class:`pybel.parser.ControlParser`
        :param bool autostreamline: Should the parser be streamlined on instantiation?
        :param Optional[list[str]] required_annotations: Optional list of required annotations
        """
        self.graph = graph
        self.allow_nested = allow_nested

        self.control_parser = ControlParser(
            annotation_dict=annotation_dict,
            annotation_regex=annotation_regex,
            citation_clearing=citation_clearing,
            required_annotations=required_annotations,
        )

        if no_identifier_validation:
            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names, )
        else:
            self.identifier_parser = IdentifierParser(
                allow_naked_names=allow_naked_names,
                namespace_dict=namespace_dict,
                namespace_regex=namespace_regex,
            )

        identifier = Group(self.identifier_parser.language)(IDENTIFIER)
        ungrouped_identifier = self.identifier_parser.language

        # 2.2 Abundance Modifier Functions

        #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_
        self.pmod = ProteinModificationParser(self.identifier_parser).language

        #: `2.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_variant_var>`_
        self.variant = VariantParser().language

        #: `2.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_proteolytic_fragments>`_
        self.fragment = FragmentParser().language

        #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_
        self.location = LocationParser(self.identifier_parser).language
        opt_location = Optional(WCW + self.location)

        #: DEPRECATED: `2.2.X Amino Acid Substitutions <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_amino_acid_substitutions>`_
        self.psub = ProteinSubstitutionParser().language

        #: DEPRECATED: `2.2.X Sequence Variations <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_sequence_variations>`_
        self.gsub = GeneSubstitutionParser().language

        #: DEPRECATED
        #: `Truncated proteins <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_truncated_proteins>`_
        self.trunc = TruncationParser().language

        #: PyBEL BEL Specification variant
        self.gmod = GeneModificationParser(
        ).language  # FIXME add identifier parser to this

        # 2.6 Other Functions

        #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_
        self.fusion = FusionParser(self.identifier_parser).language

        # 2.1 Abundance Functions

        #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.general_abundance = general_abundance_tags + nest(
            ungrouped_identifier + opt_location)

        self.gene_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(Group(self.variant | self.gsub | self.gmod))
            (VARIANTS))

        self.gene_fusion = Group(self.fusion)(FUSION)
        self.gene_fusion_legacy = Group(build_legacy_fusion(identifier,
                                                            'c'))(FUSION)

        self.gene = gene_tag + nest(
            MatchFirst([
                self.gene_fusion, self.gene_fusion_legacy, self.gene_modified
            ]) + opt_location)
        """`2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_"""

        self.mirna_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(Group(self.variant))(VARIANTS)) + opt_location

        self.mirna = mirna_tag + nest(self.mirna_modified)
        """`2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_"""

        self.protein_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(
                Group(
                    MatchFirst([
                        self.pmod, self.variant, self.fragment, self.psub,
                        self.trunc
                    ])))(VARIANTS))

        self.protein_fusion = Group(self.fusion)(FUSION)
        self.protein_fusion_legacy = Group(build_legacy_fusion(
            identifier, 'p'))(FUSION)

        self.protein = protein_tag + nest(
            MatchFirst([
                self.protein_fusion,
                self.protein_fusion_legacy,
                self.protein_modified,
            ]) + opt_location)
        """`2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_"""

        self.rna_modified = ungrouped_identifier + Optional(
            WCW + delimitedList(Group(self.variant))(VARIANTS))

        self.rna_fusion = Group(self.fusion)(FUSION)
        self.rna_fusion_legacy = Group(build_legacy_fusion(identifier,
                                                           'r'))(FUSION)

        self.rna = rna_tag + nest(
            MatchFirst([
                self.rna_fusion,
                self.rna_fusion_legacy,
                self.rna_modified,
            ]) + opt_location)
        """`2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_"""

        self.single_abundance = MatchFirst([
            self.general_abundance, self.gene, self.mirna, self.protein,
            self.rna
        ])

        #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_
        self.complex_singleton = complex_tag + nest(ungrouped_identifier +
                                                    opt_location)

        self.complex_list = complex_tag + nest(
            delimitedList(Group(self.single_abundance
                                | self.complex_singleton))(MEMBERS) +
            opt_location)

        self.complex_abundances = self.complex_list | self.complex_singleton

        # Definition of all simple abundances that can be used in a composite abundance
        self.simple_abundance = self.complex_abundances | self.single_abundance
        self.simple_abundance.setParseAction(self.check_function_semantics)

        #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_
        self.composite_abundance = composite_abundance_tag + nest(
            delimitedList(Group(self.simple_abundance))(MEMBERS) +
            opt_location)

        self.abundance = self.simple_abundance | self.composite_abundance

        # 2.4 Process Modifier Function
        # backwards compatibility with BEL v1.0

        molecular_activity_default = oneOf(list(
            language.activity_labels)).setParseAction(
                handle_molecular_activity_default)

        #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_
        self.molecular_activity = molecular_activity_tags + nest(
            molecular_activity_default | self.identifier_parser.language)

        # 2.3 Process Functions

        #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_
        self.biological_process = biological_process_tag + nest(
            ungrouped_identifier)

        #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_
        self.pathology = pathology_tag + nest(ungrouped_identifier)

        self.bp_path = self.biological_process | self.pathology
        self.bp_path.setParseAction(self.check_function_semantics)

        self.activity_standard = activity_tag + nest(
            Group(self.simple_abundance)(TARGET) +
            Optional(WCW + Group(self.molecular_activity)(EFFECT)))

        activity_legacy_tags = oneOf(language.activities)(MODIFIER)
        self.activity_legacy = activity_legacy_tags + nest(
            Group(self.simple_abundance)(TARGET))
        self.activity_legacy.setParseAction(handle_activity_legacy)

        self.activity = self.activity_standard | self.activity_legacy
        """`2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_"""

        self.process = self.bp_path | self.activity

        # 2.5 Transformation Functions

        from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC))
        to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC))

        self.cell_secretion = cell_secretion_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.cell_surface_expression = cell_surface_expression_tag + nest(
            Group(self.simple_abundance)(TARGET))

        self.translocation_standard = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(from_loc + WCW + to_loc)(EFFECT))

        self.translocation_legacy = nest(
            Group(self.simple_abundance)(TARGET) + WCW +
            Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT))

        self.translocation_legacy.addParseAction(handle_legacy_tloc)
        self.translocation_unqualified = nest(
            Group(self.simple_abundance)(TARGET))

        if not allow_unqualified_translocations:
            self.translocation_unqualified.setParseAction(
                self.handle_translocation_illegal)

        self.translocation = translocation_tag + MatchFirst([
            self.translocation_unqualified, self.translocation_standard,
            self.translocation_legacy
        ])
        """`2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_"""

        #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_
        self.degradation = degradation_tags + nest(
            Group(self.simple_abundance)(TARGET))

        #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_
        self.reactants = Suppress(REACTANTS) + nest(
            delimitedList(Group(self.simple_abundance)))
        self.products = Suppress(PRODUCTS) + nest(
            delimitedList(Group(self.simple_abundance)))

        self.reaction = reaction_tags + nest(
            Group(self.reactants)(REACTANTS),
            Group(self.products)(PRODUCTS))

        self.transformation = MatchFirst([
            self.cell_secretion, self.cell_surface_expression,
            self.translocation, self.degradation, self.reaction
        ])

        # 3 BEL Relationships

        self.bel_term = MatchFirst(
            [self.transformation, self.process, self.abundance]).streamline()

        # BEL Term to BEL Term Relationships

        #: `3.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xincreases>`_
        increases_tag = oneOf(['->', '→', 'increases'
                               ]).setParseAction(replaceWith(INCREASES))

        #: `3.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdIncreases>`_
        directly_increases_tag = one_of_tags(['=>', '⇒', 'directlyIncreases'],
                                             DIRECTLY_INCREASES)

        #: `3.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xdecreases>`_
        decreases_tag = one_of_tags(['-|', 'decreases'], DECREASES)

        #: `3.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdDecreases>`_
        directly_decreases_tag = one_of_tags(['=|', 'directlyDecreases'],
                                             DIRECTLY_DECREASES)

        #: `3.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_analogous>`_
        analogous_tag = one_of_tags(['analogousTo'], ANALOGOUS_TO)

        #: `3.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xcnc>`_
        causes_no_change_tag = one_of_tags(['cnc', 'causesNoChange'],
                                           CAUSES_NO_CHANGE)

        #: `3.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_regulates_reg>`_
        regulates_tag = one_of_tags(['reg', 'regulates'], REGULATES)

        #: `3.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XnegCor>`_
        negative_correlation_tag = one_of_tags(['neg', 'negativeCorrelation'],
                                               NEGATIVE_CORRELATION)

        #: `3.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XposCor>`_
        positive_correlation_tag = one_of_tags(['pos', 'positiveCorrelation'],
                                               POSITIVE_CORRELATION)

        #: `3.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xassociation>`_
        association_tag = one_of_tags(['--', 'association'], ASSOCIATION)

        #: `3.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_orthologous>`_
        orthologous_tag = one_of_tags(['orthologous'], ORTHOLOGOUS)

        #: `3.4.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_isa>`_
        is_a_tag = Keyword(IS_A)

        #: PyBEL Variants
        equivalent_tag = one_of_tags(['eq', EQUIVALENT_TO], EQUIVALENT_TO)
        partof_tag = Keyword(PART_OF)

        self.bel_to_bel_relations = [
            association_tag,
            increases_tag,
            decreases_tag,
            positive_correlation_tag,
            negative_correlation_tag,
            causes_no_change_tag,
            orthologous_tag,
            is_a_tag,
            equivalent_tag,
            partof_tag,
            directly_increases_tag,
            directly_decreases_tag,
            analogous_tag,
            regulates_tag,
        ]
        self.bel_to_bel = triple(self.bel_term,
                                 MatchFirst(self.bel_to_bel_relations),
                                 self.bel_term)

        # Mixed Relationships

        #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_
        rate_limit_tag = oneOf(['rateLimitingStepOf']).setParseAction(
            replaceWith(RATE_LIMITING_STEP_OF))
        self.rate_limit = triple(
            MatchFirst(
                [self.biological_process, self.activity, self.transformation]),
            rate_limit_tag, self.biological_process)

        #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_
        subprocess_of_tag = oneOf(['subProcessOf']).setParseAction(
            replaceWith(SUBPROCESS_OF))
        self.subprocess_of = triple(
            MatchFirst([self.process, self.activity, self.transformation]),
            subprocess_of_tag, self.process)

        #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_
        transcribed_tag = oneOf([':>', 'transcribedTo'
                                 ]).setParseAction(replaceWith(TRANSCRIBED_TO))
        self.transcribed = triple(self.gene, transcribed_tag, self.rna)

        #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_
        translated_tag = oneOf(['>>', 'translatedTo'
                                ]).setParseAction(replaceWith(TRANSLATED_TO))
        self.translated = triple(self.rna, translated_tag, self.protein)

        #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_
        has_member_tag = oneOf(['hasMember'
                                ]).setParseAction(replaceWith(HAS_MEMBER))
        self.has_member = triple(self.abundance, has_member_tag,
                                 self.abundance)

        #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_
        self.abundance_list = Suppress('list') + nest(
            delimitedList(Group(self.abundance)))

        has_members_tag = oneOf(['hasMembers'])
        self.has_members = triple(self.abundance, has_members_tag,
                                  self.abundance_list)
        self.has_members.setParseAction(self.handle_has_members)

        has_components_tag = oneOf(['hasComponents'])
        self.has_components = triple(self.abundance, has_components_tag,
                                     self.abundance_list)
        self.has_components.setParseAction(self.handle_has_components)

        self.has_list = self.has_members | self.has_components

        # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_
        has_component_tag = oneOf(['hasComponent']).setParseAction(
            replaceWith(HAS_COMPONENT))
        self.has_component = triple(
            self.complex_abundances | self.composite_abundance,
            has_component_tag, self.abundance)

        #: `3.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biomarkerfor>`_
        biomarker_tag = oneOf(['biomarkerFor'
                               ]).setParseAction(replaceWith(BIOMARKER_FOR))

        #: `3.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_prognosticbiomarkerfor>`_
        prognostic_biomarker_tag = oneOf([
            'prognosticBiomarkerFor'
        ]).setParseAction(replaceWith(PROGONSTIC_BIOMARKER_FOR))

        biomarker_tags = biomarker_tag | prognostic_biomarker_tag

        self.biomarker = triple(self.bel_term, biomarker_tags, self.process)

        has_variant_tags = oneOf(['hasVariant'
                                  ]).setParseAction(replaceWith(HAS_VARIANT))
        self.has_variant_relation = triple(self.abundance, has_variant_tags,
                                           self.abundance)

        has_reactant_tags = oneOf(['hasReactant'
                                   ]).setParseAction(replaceWith(HAS_REACTANT))
        has_product_tags = oneOf(['hasProduct'
                                  ]).setParseAction(replaceWith(HAS_PRODUCT))
        part_of_reaction_tags = has_reactant_tags | has_product_tags
        self.part_of_reaction = triple(self.reaction, part_of_reaction_tags,
                                       self.abundance)

        self.relation = MatchFirst([
            self.bel_to_bel,
            # self.has_member,
            # self.has_component,
            self.subprocess_of,
            self.rate_limit,
            self.biomarker,
            self.transcribed,
            self.translated,
            # self.has_variant_relation,
            # self.part_of_reaction,
        ])

        self.relation.setParseAction(self._handle_relation_harness)

        self.unqualified_relation = MatchFirst([
            self.has_member, self.has_component, self.has_variant_relation,
            self.part_of_reaction
        ])

        self.unqualified_relation.setParseAction(
            self.handle_unqualified_relation)

        #: 3.1 Causal Relationships - nested. Not enabled by default.
        causal_relation_tags = MatchFirst([
            increases_tag, decreases_tag, directly_decreases_tag,
            directly_increases_tag
        ])

        self.nested_causal_relationship = triple(
            self.bel_term, causal_relation_tags,
            nest(triple(self.bel_term, causal_relation_tags, self.bel_term)))

        self.nested_causal_relationship.setParseAction(
            self.handle_nested_relation)

        self.label_relationship = And([
            Group(self.bel_term)(SUBJECT),
            Suppress('labeled'),
            quote(OBJECT)
        ])
        self.label_relationship.setParseAction(self.handle_label_relation)

        # has_members is handled differently from all other relations becuase it gets distrinbuted
        self.relation = MatchFirst([
            self.has_list,
            self.nested_causal_relationship,
            self.relation,
            self.unqualified_relation,
            self.label_relationship,
        ])

        self.singleton_term = (self.bel_term + StringEnd()).setParseAction(
            self.handle_term)

        self.statement = self.relation | self.singleton_term
        self.language = self.control_parser.language | self.statement
        self.language.setName('BEL')

        super(BelParser, self).__init__(self.language,
                                        streamline=autostreamline)

    @property
    def namespace_dict(self):
        """The dictionary of {namespace: {name: encoding}} stored in the internal identifier parser

        :rtype: dict[str,dict[str,str]]
        """
        return self.identifier_parser.namespace_dict

    @property
    def namespace_regex(self):
        """The dictionary of {namespace keyword: compiled regular expression} stored the internal identifier parser

        :rtype: dict[str,re]
        """
        return self.identifier_parser.namespace_regex_compiled

    @property
    def annotation_dict(self):
        """A dictionary of annotations to their set of values

        :rtype: dict[str,set[str]]
        """
        return self.control_parser.annotation_dict

    @property
    def annotation_regex(self):
        """A dictionary of annotations defined by regular expressions {annotation keyword: string regular expression}

        :rtype: dict[str,str]
        """
        return self.control_parser.annotation_regex

    @property
    def allow_naked_names(self):
        """Should naked names be parsed, or should errors be thrown?

        :rtype: bool
        """
        return self.identifier_parser.allow_naked_names

    def get_annotations(self):
        """Get current annotations in this parser

        :rtype: dict
        """
        return self.control_parser.get_annotations()

    def clear(self):
        """Clears the graph and all control parser data (current citation, annotations, and statement group)"""
        self.graph.clear()
        self.control_parser.clear()

    def handle_nested_relation(self, line, position, tokens):
        """Handles nested statements. If :code:`allow_nested` is False, raises a warning.

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: NestedRelationWarning
        """
        if not self.allow_nested:
            raise NestedRelationWarning(self.line_number, line, position)

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[SUBJECT],
                RELATION: tokens[RELATION],
                OBJECT: tokens[OBJECT][SUBJECT]
            })

        self._handle_relation_harness(
            line, position, {
                SUBJECT: tokens[OBJECT][SUBJECT],
                RELATION: tokens[OBJECT][RELATION],
                OBJECT: tokens[OBJECT][OBJECT]
            })
        return tokens

    def check_function_semantics(self, line, position, tokens):
        """Raises an exception if the function used on the tokens is wrong

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: InvalidFunctionSemantic
        """
        if self.namespace_dict is None or NAMESPACE not in tokens:
            return tokens

        namespace, name = tokens[NAMESPACE], tokens[NAME]

        if namespace in self.namespace_regex:
            return tokens

        if self.allow_naked_names and tokens[
                NAMESPACE] == DIRTY:  # Don't check dirty names in lenient mode
            return tokens

        valid_functions = set(
            itt.chain.from_iterable(
                belns_encodings[k]
                for k in self.namespace_dict[namespace][name]))

        if tokens[FUNCTION] not in valid_functions:
            raise InvalidFunctionSemantic(self.line_number, line, position,
                                          tokens[FUNCTION], namespace, name,
                                          valid_functions)

        return tokens

    def handle_term(self, line, position, tokens):
        """Handles BEL terms (the subject and object of BEL relations)

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        self.ensure_node(tokens)
        return tokens

    def _handle_list_helper(self, tokens, relation):
        """Provides the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`"""
        parent_node_tuple, parent_node_attr = self.ensure_node(tokens[0])

        for child_tokens in tokens[2]:
            child_node_tuple, child_node_attr = self.ensure_node(child_tokens)
            self.graph.add_unqualified_edge(parent_node_tuple,
                                            child_node_tuple, relation)

        return tokens

    def handle_has_members(self, line, position, tokens):
        """Handles list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        return self._handle_list_helper(tokens, HAS_MEMBER)

    def handle_has_components(self, line, position, tokens):
        """Handles list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        return self._handle_list_helper(tokens, HAS_COMPONENT)

    def _add_qualified_edge_helper(self, u, v, relation, annotations,
                                   subject_modifier, object_modifier):
        """Adds a qualified edge from the internal aspects of the parser"""
        self.graph.add_qualified_edge(
            u,
            v,
            relation=relation,
            evidence=self.control_parser.evidence,
            citation=self.control_parser.citation.copy(),
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
            **{LINE: self.line_number})

    def _add_qualified_edge(self, u, v, relation, annotations,
                            subject_modifier, object_modifier):
        """Adds an edge, then adds the opposite direction edge if it should"""
        self._add_qualified_edge_helper(
            u,
            v,
            relation=relation,
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

        if relation in TWO_WAY_RELATIONS:
            self._add_qualified_edge_helper(
                v,
                u,
                relation=relation,
                annotations=annotations,
                object_modifier=subject_modifier,
                subject_modifier=object_modifier,
            )

    def _handle_relation(self, tokens):
        """A policy in which all annotations are stored as sets, including single annotations

        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT])
        object_node_tuple, _ = self.ensure_node(tokens[OBJECT])

        subject_modifier = modifier_po_to_dict(tokens[SUBJECT])
        object_modifier = modifier_po_to_dict(tokens[OBJECT])

        annotations = {
            annotation_name: ({ae: True
                               for ae in annotation_entry} if isinstance(
                                   annotation_entry, set) else {
                                       annotation_entry: True
                                   })
            for annotation_name, annotation_entry in
            self.control_parser.annotations.items()
        }

        self._add_qualified_edge(
            subject_node_tuple,
            object_node_tuple,
            relation=tokens[RELATION],
            annotations=annotations,
            subject_modifier=subject_modifier,
            object_modifier=object_modifier,
        )

    def _handle_relation_harness(self, line, position, tokens):
        """Handles BEL relations based on the policy specified on instantiation. Note: this can't be changed after
        instantiation!

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        if not self.control_parser.citation:
            raise MissingCitationException(self.line_number, line, position)

        if not self.control_parser.evidence:
            raise MissingSupportWarning(self.line_number, line, position)

        missing_required_annotations = self.control_parser.get_missing_required_annotations(
        )
        if missing_required_annotations:
            raise MissingAnnotationWarning(self.line_number, line, position,
                                           missing_required_annotations)

        self._handle_relation(tokens)

        return tokens

    def handle_unqualified_relation(self, line, position, tokens):
        """Handles unqualified relations

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        """
        subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT])
        object_node_tuple, _ = self.ensure_node(tokens[OBJECT])
        rel = tokens[RELATION]
        self.graph.add_unqualified_edge(subject_node_tuple, object_node_tuple,
                                        rel)

    def handle_label_relation(self, line, position, tokens):
        """Handles statements like ``p(X) label "Label for X"``

        :param str line: The line being parsed
        :param int position: The position in the line being parsed
        :param pyparsing.ParseResult tokens: The tokens from PyParsing
        :raises: RelabelWarning
        """
        subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT])
        description = tokens[OBJECT]

        if self.graph.has_node_description(subject_node_tuple):
            raise RelabelWarning(
                line_number=self.line_number,
                line=line,
                position=position,
                node=self.graph.node,
                old_label=self.graph.get_node_description(subject_node_tuple),
                new_label=description)

        self.graph.set_node_description(subject_node_tuple, description)

    def ensure_node(self, tokens):
        """Turns parsed tokens into canonical node name and makes sure its in the graph

        :param pyparsing.ParseResult tokens: Tokens from PyParsing
        :return: A pair of the PyBEL node tuple and the PyBEL node data dictionary
        :rtype: tuple[tuple, dict]
        """
        if MODIFIER in tokens:
            return self.ensure_node(tokens[TARGET])

        node_attr_dict = po_to_dict(tokens)
        node_tuple = self.graph.add_node_from_data(node_attr_dict)

        return node_tuple, node_attr_dict

    def handle_translocation_illegal(self, line, position, tokens):
        raise MalformedTranslocationWarning(self.line_number, line, position,
                                            tokens)
コード例 #5
0
def create_grammar(container_ids, secret_ids):
    """ Create the grammar for the editfile """
    from pyparsing import (nums, alphas, lineEnd, stringEnd, OneOrMore,
                           ZeroOrMore, SkipTo, Optional, And, Word, CharsNotIn,
                           Empty, QuotedString, Literal, Suppress, Group,
                           Combine, originalTextFor, Forward, ParserElement)
    # Read from bottom to top
    whiteSpaceChars = ' \t'
    ParserElement.setDefaultWhitespaceChars(whiteSpaceChars)
    word = Empty() + CharsNotIn(whiteSpaceChars + '\n')
    quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction(
        # NOTE the second replace is a work-around for
        #      pyparsing bug #68.
        #       https://sourceforge.net/p/pyparsing/bugs/68/
        lambda s, l, t: t[0].replace("\\n", "\n").replace("\\\\", "\\"))

    def secretIdNumberParseAction(s, loc, tokens):
        v = int(tokens[0])
        if not v in secret_ids:
            raise ParseException(s, loc, "Not a valid secret id")
        return v

    secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction)

    def containerIdParseAction(s, loc, tokens):
        v = int(tokens[0])
        if not v in container_ids:
            raise ParseException(s, loc, "Not a valid container id")
        return v

    containerId = Word(nums).setParseAction(containerIdParseAction)
    key = quotedString | word
    secretString = ~Literal('#') + (quotedString | word)
    secretId = Suppress('#') + secretIdNumber
    secret = secretString | secretId
    note = quotedString | originalTextFor(OneOrMore(word))
    containerKeyword = Suppress('CONTAINER')
    entry = (~containerKeyword + Group(key - secret - Optional(note)) -
             Suppress(lineEnd))
    comment = Suppress(lineEnd | '#' + SkipTo(lineEnd))
    line = comment | entry
    containerLine = containerKeyword + containerId + comment
    # Instead of the following recursive grammar, we could have simply used
    #
    #     containerBlock = ZeroOrMore(comment) + Group(containerLine
    #                                                + Group(OneOrMore(line)))
    #     multipleContainers = OneOrMore(containerBlock)
    #
    # but then any parsing error in line will result in a "expected stringEnd"
    # or "expected CONTAINER".
    _multipleContainers_head = Forward()
    _multipleContainers_body = Forward()
    _multipleContainers_head << (stringEnd | comment + _multipleContainers_head
                                 | containerLine + _multipleContainers_body)
    _multipleContainers_body << (
        stringEnd
        | (containerLine | line) + _multipleContainers_body)
    _multipleContainers_entry = And([entry])
    multipleContainers = And([_multipleContainers_head])  # TODO ibidem below
    containerLine.setParseAction(lambda s, l, t: [[None, t[0]]])

    def multipleContainersParseAction(s, loc, tokens):
        curEntries = []
        curId = None
        ret = []
        for t in tuple(tokens) + ((None, None), ):
            if t[0] is not None:
                assert curId is not None
                curEntries.append(t)
                continue
            if curId is not None:
                ret.append([curId, curEntries])
            curId = t[1]
            curEntries = []
        return ret

    multipleContainers.setParseAction(multipleContainersParseAction)
    oneContainer = ZeroOrMore(line) + stringEnd
    oneContainer.setParseAction(lambda s, l, t: [[None, t]])
    grammar = multipleContainers | oneContainer
    return grammar
コード例 #6
0
ファイル: editfile.py プロジェクト: bwesterb/pol
def create_grammar(container_ids, secret_ids):
    """ Create the grammar for the editfile """
    from pyparsing import (nums, alphas, lineEnd, stringEnd,
                            OneOrMore, ZeroOrMore, SkipTo, Optional, And,
                            Word, CharsNotIn, Empty, QuotedString, Literal,
                            Suppress, Group, Combine, originalTextFor, Forward,
                            ParserElement)
    # Read from bottom to top
    whiteSpaceChars = ' \t'
    ParserElement.setDefaultWhitespaceChars(whiteSpaceChars)
    word = Empty() + CharsNotIn(whiteSpaceChars + '\n')
    quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction(
                        # NOTE the second replace is a work-around for
                        #      pyparsing bug #68.
                        #       https://sourceforge.net/p/pyparsing/bugs/68/
                        lambda s,l,t: t[0].replace("\\n", "\n").replace(
                                                   "\\\\", "\\"))
    def secretIdNumberParseAction(s, loc, tokens):
        v = int(tokens[0])
        if not v in secret_ids:
            raise ParseException(s, loc, "Not a valid secret id")
        return v
    secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction)
    def containerIdParseAction(s, loc, tokens):
        v = int(tokens[0])
        if not v in container_ids:
            raise ParseException(s, loc, "Not a valid container id")
        return v
    containerId = Word(nums).setParseAction(containerIdParseAction)
    key = quotedString | word
    secretString = ~Literal('#') + (quotedString | word)
    secretId = Suppress('#') + secretIdNumber
    secret = secretString | secretId
    note = quotedString | originalTextFor(OneOrMore(word))
    containerKeyword = Suppress('CONTAINER')
    entry = (~containerKeyword + Group(key - secret - Optional(note))
                - Suppress(lineEnd))
    comment = Suppress(lineEnd | '#' + SkipTo(lineEnd))
    line = comment | entry
    containerLine = containerKeyword + containerId + comment
    # Instead of the following recursive grammar, we could have simply used
    #
    #     containerBlock = ZeroOrMore(comment) + Group(containerLine
    #                                                + Group(OneOrMore(line)))
    #     multipleContainers = OneOrMore(containerBlock)
    #
    # but then any parsing error in line will result in a "expected stringEnd"
    # or "expected CONTAINER".
    _multipleContainers_head = Forward()
    _multipleContainers_body = Forward()
    _multipleContainers_head << (stringEnd | comment + _multipleContainers_head
                    | containerLine + _multipleContainers_body)
    _multipleContainers_body << (stringEnd
                    | (containerLine | line) + _multipleContainers_body)
    _multipleContainers_entry = And([entry])
    multipleContainers = And([_multipleContainers_head]) # TODO ibidem below
    containerLine.setParseAction(lambda s,l,t: [[None, t[0]]])
    def multipleContainersParseAction(s, loc, tokens):
        curEntries = []
        curId = None
        ret = []
        for t in tuple(tokens) + ((None, None),):
            if t[0] is not None:
                assert curId is not None
                curEntries.append(t)
                continue
            if curId is not None:
                ret.append([curId, curEntries])
            curId = t[1]
            curEntries = []
        return ret
    multipleContainers.setParseAction(multipleContainersParseAction)
    oneContainer = ZeroOrMore(line) + stringEnd
    oneContainer.setParseAction(lambda s,l,t: [[None, t]])
    grammar = multipleContainers | oneContainer
    return grammar