コード例 #1
0
 def test_parts(self):
     """Test splitting file into annotations and definitions."""
     lines = simple.splitlines()
     docs, definitions, statements = split_file_to_annotations_and_definitions(lines)
     self.assertEqual(8, len(list(docs)))
     self.assertEqual(4, len(list(definitions)))
     self.assertEqual(14, len(list(statements)))
コード例 #2
0
ファイル: line_utils.py プロジェクト: plin1112/pybel
def parse_lines(
    graph: BELGraph,
    lines: Iterable[str],
    manager: Optional[Manager] = None,
    disallow_nested: bool = False,
    citation_clearing: bool = True,
    use_tqdm: bool = False,
    tqdm_kwargs: Optional[Mapping[str, Any]] = None,
    no_identifier_validation: bool = False,
    disallow_unqualified_translocations: bool = False,
    allow_redefinition: bool = False,
    allow_definition_failures: bool = False,
    allow_naked_names: bool = False,
    required_annotations: Optional[List[str]] = None,
    upgrade_urls: bool = False,
) -> None:
    """Parse an iterable of lines into this graph.

    Delegates to :func:`parse_document`, :func:`parse_definitions`, and :func:`parse_statements`.

    :param graph: A BEL graph
    :param lines: An iterable over lines of BEL script
    :param manager: A PyBEL database manager
    :param disallow_nested: If true, turns on nested statement failures
    :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations?
                                   Delegated to :class:`pybel.parser.ControlParser`
    :param use_tqdm: Use :mod:`tqdm` to show a progress bar?
    :param tqdm_kwargs: Keywords to pass to ``tqdm``
    :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses.
    :param required_annotations: Annotations that are required for all statements
    :param upgrade_urls: Automatically upgrade old namespace URLs. Defaults to false.

    .. warning::

        These options allow concessions for parsing BEL that is either **WRONG** or **UNSCIENTIFIC**. Use them at
        risk to reproducibility and validity of your results.

    :param no_identifier_validation: If true, turns off namespace validation
    :param allow_naked_names: If true, turns off naked namespace failures
    :param allow_redefinition: If true, doesn't fail on second definition of same name or annotation
    :param allow_definition_failures: If true, allows parsing to continue if a terminology file download/parse fails
    """
    docs, definitions, statements = split_file_to_annotations_and_definitions(lines)

    if manager is None:
        manager = Manager()

    metadata_parser = MetadataParser(
        manager,
        allow_redefinition=allow_redefinition,
        skip_validation=no_identifier_validation,
        upgrade_urls=upgrade_urls,
    )

    parse_document(
        graph,
        docs,
        metadata_parser,
    )

    parse_definitions(
        graph,
        definitions,
        metadata_parser,
        allow_failures=allow_definition_failures,
        use_tqdm=use_tqdm,
        tqdm_kwargs=tqdm_kwargs,
    )

    bel_parser = BELParser(
        graph=graph,
        # terminologies
        namespace_to_term_to_encoding=metadata_parser.namespace_to_term_to_encoding,
        namespace_to_pattern=metadata_parser.namespace_to_pattern,
        annotation_to_term=metadata_parser.annotation_to_term,
        annotation_to_pattern=metadata_parser.annotation_to_pattern,
        annotation_to_local=metadata_parser.annotation_to_local,
        # language settings
        disallow_nested=disallow_nested,
        citation_clearing=citation_clearing,
        skip_validation=no_identifier_validation,
        allow_naked_names=allow_naked_names,
        disallow_unqualified_translocations=disallow_unqualified_translocations,
        required_annotations=required_annotations,
    )

    parse_statements(
        graph,
        statements,
        bel_parser,
        use_tqdm=use_tqdm,
        tqdm_kwargs=tqdm_kwargs,
    )

    logger.info('Network has %d nodes and %d edges', graph.number_of_nodes(), graph.number_of_edges())