def __init__( self, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, citation_clearing: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Initialize the control statement parser. :param annotation_to_term: A dictionary of {annotation: set of valid values} defined with URL for parsing :param annotation_to_pattern: A dictionary of {annotation: regular expression string} :param annotation_to_local: A dictionary of {annotation: set of valid values} for parsing defined with LIST :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? :param required_annotations: Annotations that are required """ self.citation_clearing = citation_clearing self.annotation_to_term = annotation_to_term or {} self.annotation_to_pattern = annotation_to_pattern or {} self.annotation_to_local = annotation_to_local or {} self.statement_group = None self.citation_db = None self.citation_db_id = None self.evidence = None self.annotations = {} self.required_annotations = required_annotations or [] annotation_key = ppc.identifier('key').setParseAction( self.handle_annotation_key) self.set_statement_group = set_statement_group_stub().setParseAction( self.handle_set_statement_group) self.set_citation = set_citation_stub.setParseAction( self.handle_set_citation) self.set_evidence = set_evidence_stub.setParseAction( self.handle_set_evidence) set_command_prefix = And([annotation_key('key'), Suppress('=')]) self.set_command = set_command_prefix + qid('value') self.set_command.setParseAction(self.handle_set_command) self.set_command_list = set_command_prefix + delimited_quoted_list( 'values') self.set_command_list.setParseAction(self.handle_set_command_list) self.unset_command = annotation_key('key') self.unset_command.addParseAction(self.handle_unset_command) self.unset_evidence = supporting_text_tags(EVIDENCE) self.unset_evidence.setParseAction(self.handle_unset_evidence) self.unset_citation = Suppress(BEL_KEYWORD_CITATION) self.unset_citation.setParseAction(self.handle_unset_citation) self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP) self.unset_statement_group.setParseAction( self.handle_unset_statement_group) self.unset_list = delimited_unquoted_list('values') self.unset_list.setParseAction(self.handle_unset_list) self.unset_all = unset_all.setParseAction(self.handle_unset_all) self.set_statements = set_tag + MatchFirst([ self.set_statement_group, self.set_citation, self.set_evidence, self.set_command, self.set_command_list, ]) self.unset_statements = unset_tag + MatchFirst([ self.unset_all, self.unset_citation, self.unset_evidence, self.unset_statement_group, self.unset_command, self.unset_list, ]) self.language = self.set_statements | self.unset_statements super(ControlParser, self).__init__(self.language)
def __init__(self, manager, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, default_namespace=None, allow_redefinition=False): """ :param pybel.manager.Manager manager: A cache manager :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from {namespace keyword: {name: encoding}} :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from {annotation keyword: set of valid values} :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from {namespace keyword: regex string} :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from {annotation keyword: regex string} :param set[str] default_namespace: A set of strings that can be used without a namespace """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition #: A dictionary of cached {namespace keyword: {name: encoding}} self.namespace_dict = {} if namespace_dict is None else namespace_dict #: A dictionary of cached {annotation keyword: set of values} self.annotation_dict = {} if annotation_dict is None else annotation_dict #: A dictionary of {namespace keyword: regular expression string} self.namespace_regex = {} if namespace_regex is None else namespace_regex #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of {annotation keyword: regular expression string} self.annotation_regex = {} if annotation_regex is None else annotation_regex #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {namespace keyword: OWL namespace URL} self.namespace_owl_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} #: A dictionary from {annotation keyword: OWL annotation URL} self.annotation_owl_dict = {} #: A set of annotation keywords that are defined ad-hoc in the BEL script self.annotation_lists = set() self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value') ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_owl = And([ namespace_tag, owl_tag, Optional(function_tags('functions')), quote('url') ]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_owl = And([annotation_tag, owl_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_owl.setParseAction(self.handle_namespace_owl) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_owl.setParseAction(self.handle_annotation_owl) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.namespace_owl, self.annotation_url, self.annotation_list, self.annotation_owl, self.annotation_pattern, self.namespace_pattern ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language)
# -*- coding: utf-8 -*- """Parse the TSV template format with PyParsing then make a new parser.""" from pyparsing import Group, MatchFirst, Optional, Suppress, delimitedList, nestedExpr, pyparsing_common as ppc from table_validator import parse_tsv keyword = ppc.identifier + Suppress('=') + ppc.identifier te_keywords = nestedExpr(content=delimitedList(keyword)) te_content = ppc.identifier + Optional(te_keywords) template_command = nestedExpr(opener='{', closer='}', content=te_content) cell = MatchFirst([ Group(template_command)('command'), Group(template_command)('command') + ppc.identifier('text'), ppc.identifier('text'), ]) if __name__ == '__main__': with open('../../tests/repeat_template.tsv') as file: t = [ [ cell.parseString(col) for col in row ] for row in parse_tsv(file) ] for i, row in enumerate(t): for j, col in enumerate(row):
def __init__( self, manager, namespace_to_term_to_encoding: Optional[ NamespaceTermEncodingMapping] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, default_namespace: Optional[Set[str]] = None, allow_redefinition: bool = False, skip_validation: bool = False, upgrade_urls: bool = False, ) -> None: """Build a metadata parser. :param manager: A cache manager :param namespace_to_term_to_encoding: An enumerated namespace mapping from {namespace keyword: {(identifier, name): encoding}} :param namespace_to_pattern: A regular expression namespace mapping from {namespace keyword: regex string} :param annotation_to_term: Enumerated annotation mapping from {annotation keyword: set of valid values} :param annotation_to_pattern: Regular expression annotation mapping from {annotation keyword: regex string} :param default_namespace: A set of strings that can be used without a namespace :param skip_validation: If true, don't download and cache namespaces/annotations """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition self.skip_validation = skip_validation self.upgrade_urls = upgrade_urls #: A dictionary of cached {namespace keyword: {(identifier, name): encoding}} self.namespace_to_term_to_encoding = namespace_to_term_to_encoding or {} #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary of {namespace keyword: regular expression string} self.namespace_to_pattern = namespace_to_pattern or {} #: A set of names that can be used without a namespace self.default_namespace = set( default_namespace) if default_namespace is not None else None #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_term = annotation_to_term or {} #: A dictionary of {annotation keyword: regular expression string} self.annotation_to_pattern = annotation_to_pattern or {} #: A dictionary of cached {annotation keyword: set of values} self.annotation_to_local = annotation_to_local or {} #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value'), ]) namespace_tag = And([ define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag ]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_pattern = And( [namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([ define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag ]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_list = And( [annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And( [annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.annotation_url, self.annotation_list, self.annotation_pattern, self.namespace_pattern, ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language)
def __init__(self, annotation_dict=None, annotation_regex=None, citation_clearing=True, required_annotations=None): """ :param annotation_dict: A dictionary of {annotation: set of valid values} for parsing :type annotation_dict: Optional[dict[str,set[str]]] :param annotation_regex: A dictionary of {annotation: regular expression string} :type annotation_regex: Optional[dict[str,str]] :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? :param Optional[list[str]] required_annotations: Annotations that are required """ self.citation_clearing = citation_clearing self._annotation_dict = {} if annotation_dict is None else annotation_dict self._annotation_regex = {} if annotation_regex is None else annotation_regex self._annotation_regex_compiled = { keyword: re.compile(value) for keyword, value in self.annotation_regex.items() } self.statement_group = None self.citation = {} self.evidence = None self.annotations = {} self.required_annotations = required_annotations or [] annotation_key = ppc.identifier('key').setParseAction( self.handle_annotation_key) self.set_statement_group = set_statement_group_stub().setParseAction( self.handle_set_statement_group) self.set_citation = set_citation_stub().setParseAction( self.handle_set_citation) self.set_evidence = set_evidence_stub().setParseAction( self.handle_set_evidence) set_command_prefix = And([annotation_key('key'), Suppress('=')]) self.set_command = set_command_prefix + qid('value') self.set_command.setParseAction(self.handle_set_command) self.set_command_list = set_command_prefix + delimited_quoted_list( 'values') self.set_command_list.setParseAction(self.handle_set_command_list) self.unset_command = annotation_key('key') self.unset_command.addParseAction(self.handle_unset_command) self.unset_evidence = supporting_text_tags(EVIDENCE) self.unset_evidence.setParseAction(self.handle_unset_evidence) self.unset_citation = Suppress(BEL_KEYWORD_CITATION) self.unset_citation.setParseAction(self.handle_unset_citation) self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP) self.unset_statement_group.setParseAction( self.handle_unset_statement_group) self.unset_list = delimited_unquoted_list('values') self.unset_list.setParseAction(self.handle_unset_list) self.unset_all = unset_all.setParseAction(self.handle_unset_all) self.set_statements = set_tag + MatchFirst([ self.set_statement_group, self.set_citation, self.set_evidence, self.set_command, self.set_command_list, ]) self.unset_statements = unset_tag + MatchFirst([ self.unset_all, self.unset_citation, self.unset_evidence, self.unset_statement_group, self.unset_command, self.unset_list ]) self.language = self.set_statements | self.unset_statements super(ControlParser, self).__init__(self.language)