def get_match_first(lits, parseAction=None): el = MatchFirst(NoMatch()) for lit in lits: el = el.__ior__(lit) if parseAction: el.setParseAction(parseAction) return el
def define_simple_literals(literal_list, parseAction=None): l = MatchFirst([CaselessKeyword(k) for k in literal_list]) if parseAction: l = l.setParseAction(parseAction) return l
def _get_requirements_build_gradle(path: str) -> list: """ Get list of requirements from Maven project. Files supported are build.gradle :param path: Project path """ reqs = [] for file_path in full_paths_in_dir(path): if not file_path.endswith('build.gradle'): continue with open(file_path, encoding='latin-1') as file_fd: file_content = file_fd.read() string = MatchFirst([quotedString('"'), quotedString("'")]) string.setParseAction(lambda x: [x[0][1:-1]]) grammars: list = [ Suppress(Keyword('compile') + Optional('(')) + string.copy()('package'), Suppress(Keyword('compile') + Optional('(')) + Suppress(Keyword('group') + ':') + string.copy()('group') + Suppress(',') + Suppress(Keyword('name') + ':') + string.copy()('name') + Suppress(',') + Suppress(Keyword('version') + ':') + string.copy()('version'), ] for grammar in grammars: for tokens, _, _ in grammar.scanString(file_content): matches = tokens.asDict() if 'package' in matches: if ':' in matches['package']: name, version = matches['package'].rsplit(':', 1) else: name, version = matches['package'], None reqs.append((file_path, name, version)) else: reqs.append( (file_path, f"{matches['group']}:{matches['name']}", matches['version'])) reqs.append( (file_path, matches['group'], matches['version'])) return reqs
def __init__(self): self.json_query = {'query':{}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction( lambda x : self.frame.update({'dist' : float(x[0]), 'units' : x[1]})) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x : self.frame.update({'filter' : x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x : self.frame.update({'index' : x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')').setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction(lambda x : self.frame.update({'from' : x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction(lambda x : self.frame.update({'to' : x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional(date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x : self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- time_bounds = CaselessLiteral("TIMEBOUNDS") + date_from_statement + date_to_statement time_bounds.setParseAction(lambda x : self.time_bounds_frame()) #-------------------------------------------------------------------------------------- # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- vertical_bounds = CaselessLiteral("VERTICAL") + from_statement + to_statement vertical_bounds.setParseAction(lambda x : self.vertical_bounds_frame()) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional(from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x : self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x : self.frame.update({'lat': float(x[5]), 'lon':float(x[7])})) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x : self.frame.update({'top_left':[float(x[5]),float(x[3])], 'bottom_right':[float(x[10]),float(x[8])]})) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <match-query> ::= "MATCH" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x : self.frame.update({'value':x[1]})) geo_query = CaselessLiteral("GEO") + ( geo_distance | geo_bbox ) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction(lambda x : self.frame.update({'fuzzy':x[1]})) match_query = CaselessLiteral("MATCH") + field_query match_query.setParseAction(lambda x : self.frame.update({'match':x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.json_query.update({'limit' : int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({'depth' : int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.json_query.update({'order' : {x[2] : 'asc'}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x : self.json_query.update({'skip' : int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + (range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name # Add the field to the frame object search_query.setParseAction(lambda x : self.frame.update({'field' : x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x : self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x : self.frame.update({'association':x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional(depth_parameter) owner_query.setParseAction(lambda x : self.frame.update({'owner':x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x : self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x : self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x : self.or_frame()) self.sentence = primary_query + (intersection ^ union)*(0,None) + query_parameter*(0,None)
class BELParser(BaseParser): """Build a parser backed by a given dictionary of namespaces.""" def __init__( self, graph, namespace_to_term: Optional[Mapping[str, Mapping[str, str]]] = None, namespace_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_term: Optional[Mapping[str, Set[str]]] = None, annotation_to_pattern: Optional[Mapping[str, Pattern]] = None, annotation_to_local: Optional[Mapping[str, Set[str]]] = None, allow_naked_names: bool = False, allow_nested: bool = False, disallow_unqualified_translocations: bool = False, citation_clearing: bool = True, skip_validation: bool = False, autostreamline: bool = True, required_annotations: Optional[List[str]] = None, ) -> None: """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_to_term: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param namespace_to_pattern: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param annotation_to_term: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_pattern: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :param annotation_to_local: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :param allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param autostreamline: Should the parser be streamlined on instantiation? :param required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_to_term=annotation_to_term, annotation_to_pattern=annotation_to_pattern, annotation_to_local=annotation_to_local, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_to_term=namespace_to_term, namespace_to_pattern=namespace_to_pattern, ) self.control_parser.get_line_number = self.get_line_number self.identifier_parser.get_line_number = self.get_line_number identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = get_protein_modification_language( self.identifier_parser.identifier_qualified) #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = get_location_language(self.identifier_parser.language) opt_location = pyparsing.Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language( self.identifier_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = get_fusion_language(self.identifier_parser.language) # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest( ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'c'))(FUSION) #: `2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_ self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) self.mirna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location #: `2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList( Group(MatchFirst([self.pmod, variant, fragment, psub, trunc]))) (VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'p'))(FUSION) #: `2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_ self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) self.rna_modified = ungrouped_identifier + pyparsing.Optional( WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group( get_legacy_fusion_langauge(identifier, 'r'))(FUSION) #: `2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_ self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest( ungrouped_identifier) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + pyparsing.Optional(WCW + Group(self.molecular_activity)(EFFECT))) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT)) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT)) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) #: `2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term))) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) self.label_relationship = And([ Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT) ]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline) @property def _namespace_dict(self) -> Mapping[str, Mapping[str, str]]: """Get the dictionary of {namespace: {name: encoding}} stored in the internal identifier parser.""" return self.identifier_parser.namespace_to_terms @property def _allow_naked_names(self) -> bool: """Return if naked names should be parsed (``True``), or if errors should be thrown (``False``).""" return self.identifier_parser.allow_naked_names def get_annotations(self) -> Dict: """Get the current annotations in this parser.""" return self.control_parser.get_annotations() def clear(self): """Clear the graph and all control parser data (current citation, annotations, and statement group).""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line: str, position: int, tokens: ParseResults): """Handle nested statements. If :code:`allow_nested` is False, raises a ``NestedRelationWarning``. :raises: NestedRelationWarning """ if not self.allow_nested: raise NestedRelationWarning(self.get_line_number(), line, position) self._handle_relation_harness( line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT], }) self._handle_relation_harness( line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT], }) return tokens def check_function_semantics(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Raise an exception if the function used on the tokens is wrong. :raises: InvalidFunctionSemantic """ if not self._namespace_dict or NAMESPACE not in tokens: return tokens namespace, name = tokens[NAMESPACE], tokens[NAME] if namespace in self.identifier_parser.namespace_to_pattern: return tokens if self._allow_naked_names and tokens[ NAMESPACE] == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set( itt.chain.from_iterable( belns_encodings.get(k, set()) for k in self._namespace_dict[namespace][name])) if not valid_functions: raise InvalidEntity(self.get_line_number(), line, position, namespace, name) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic(self.get_line_number(), line, position, tokens[FUNCTION], namespace, name, valid_functions) return tokens def handle_term(self, _, __, tokens: ParseResults) -> ParseResults: """Handle BEL terms (the subject and object of BEL relations).""" self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens: ParseResults, relation: str) -> ParseResults: """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`.""" parent_node_dsl = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_dsl = self.ensure_node(child_tokens) self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl, relation) return tokens def handle_has_members(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, HAS_MEMBER) def handle_has_components(self, _, __, tokens: ParseResults) -> ParseResults: """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``.""" return self._handle_list_helper(tokens, HAS_COMPONENT) def _add_qualified_edge_helper(self, u, v, relation, annotations, subject_modifier, object_modifier) -> str: """Add a qualified edge from the internal aspects of the parser.""" return self.graph.add_qualified_edge( u, v, relation=relation, evidence=self.control_parser.evidence, citation=self.control_parser.citation.copy(), annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **{LINE: self.get_line_number()}) def _add_qualified_edge(self, u, v, relation, annotations, subject_modifier, object_modifier) -> str: """Add an edge, then adds the opposite direction edge if it should.""" sha512 = self._add_qualified_edge_helper( u, v, relation=relation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper( v, u, relation=relation, annotations=annotations, object_modifier=subject_modifier, subject_modifier=object_modifier, ) return sha512 def _handle_relation(self, tokens: ParseResults) -> str: """Handle a relation.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) subject_modifier = modifier_po_to_dict(tokens[SUBJECT]) object_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = { annotation_name: ({ae: True for ae in annotation_entry} if isinstance( annotation_entry, set) else { annotation_entry: True }) for annotation_name, annotation_entry in self.control_parser.annotations.items() } return self._add_qualified_edge( subject_node_dsl, object_node_dsl, relation=tokens[RELATION], annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) def _handle_relation_harness( self, line: str, position: int, tokens: Union[ParseResults, Dict]) -> ParseResults: """Handle BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! """ if not self.control_parser.citation: raise MissingCitationException(self.get_line_number(), line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.get_line_number(), line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations( ) if missing_required_annotations: raise MissingAnnotationWarning(self.get_line_number(), line, position, missing_required_annotations) self._handle_relation(tokens) return tokens def handle_unqualified_relation(self, _, __, tokens: ParseResults) -> ParseResults: """Handle unqualified relations.""" subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) relation = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl, relation) return tokens def handle_label_relation(self, line: str, position: int, tokens: ParseResults) -> ParseResults: """Handle statements like ``p(X) label "Label for X"``. :raises: RelabelWarning """ subject_node_dsl = self.ensure_node(tokens[SUBJECT]) description = tokens[OBJECT] if self.graph.has_node_description(subject_node_dsl): raise RelabelWarning( line_number=self.get_line_number(), line=line, position=position, node=self.graph.node, old_label=self.graph.get_node_description(subject_node_dsl), new_label=description) self.graph.set_node_description(subject_node_dsl, description) return tokens def ensure_node(self, tokens: ParseResults) -> BaseEntity: """Turn parsed tokens into canonical node name and makes sure its in the graph.""" if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node = parse_result_to_dsl(tokens) self.graph.add_node_from_data(node) return node def handle_translocation_illegal(self, line: str, position: int, tokens: ParseResults) -> None: """Handle a malformed translocation.""" raise MalformedTranslocationWarning(self.get_line_number(), line, position, tokens)
class BelParser(BaseParser): """Build a parser backed by a given dictionary of namespaces""" def __init__(self, graph, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, allow_naked_names=False, allow_nested=False, allow_unqualified_translocations=False, citation_clearing=True, no_identifier_validation=False, autostreamline=True, required_annotations=None): """ :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_dict: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :type namespace_dict: Optional[dict[str,dict[str,str]]] :param annotation_dict: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :rype annotation_dict: Optional[dict[str,set[str]]] :param namespace_regex: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :type namespace_regex: Optional[dict[str,str]] :param annotation_regex: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :type annotation_regex: Optional[dict[str,str]] :param bool allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param bool allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param bool allow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param bool autostreamline: Should the parser be streamlined on instantiation? :param Optional[list[str]] required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.control_parser = ControlParser( annotation_dict=annotation_dict, annotation_regex=annotation_regex, citation_clearing=citation_clearing, required_annotations=required_annotations, ) if no_identifier_validation: self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_dict=namespace_dict, namespace_regex=namespace_regex, ) identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_protein_modifications>`_ self.pmod = ProteinModificationParser(self.identifier_parser).language #: `2.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_variant_var>`_ self.variant = VariantParser().language #: `2.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_proteolytic_fragments>`_ self.fragment = FragmentParser().language #: `2.2.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellular_location>`_ self.location = LocationParser(self.identifier_parser).language opt_location = Optional(WCW + self.location) #: DEPRECATED: `2.2.X Amino Acid Substitutions <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_amino_acid_substitutions>`_ self.psub = ProteinSubstitutionParser().language #: DEPRECATED: `2.2.X Sequence Variations <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_sequence_variations>`_ self.gsub = GeneSubstitutionParser().language #: DEPRECATED #: `Truncated proteins <http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_truncated_proteins>`_ self.trunc = TruncationParser().language #: PyBEL BEL Specification variant self.gmod = GeneModificationParser( ).language # FIXME add identifier parser to this # 2.6 Other Functions #: `2.6.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_fusion_fus>`_ self.fusion = FusionParser(self.identifier_parser).language # 2.1 Abundance Functions #: `2.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.general_abundance = general_abundance_tags + nest( ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(self.variant | self.gsub | self.gmod)) (VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group(build_legacy_fusion(identifier, 'c'))(FUSION) self.gene = gene_tag + nest( MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) """`2.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA>`_""" self.mirna_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(self.variant))(VARIANTS)) + opt_location self.mirna = mirna_tag + nest(self.mirna_modified) """`2.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA>`_""" self.protein_modified = ungrouped_identifier + Optional( WCW + delimitedList( Group( MatchFirst([ self.pmod, self.variant, self.fragment, self.psub, self.trunc ])))(VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group(build_legacy_fusion( identifier, 'p'))(FUSION) self.protein = protein_tag + nest( MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) """`2.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA>`_""" self.rna_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(self.variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group(build_legacy_fusion(identifier, 'r'))(FUSION) self.rna = rna_tag + nest( MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) """`2.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA>`_""" self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA>`_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA>`_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list( language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA>`_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language) # 2.3 Process Functions #: `2.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp>`_ self.biological_process = biological_process_tag + nest( ungrouped_identifier) #: `2.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path>`_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + Optional(WCW + Group(self.molecular_activity)(EFFECT))) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest( Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) self.activity = self.activity_standard | self.activity_legacy """`2.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity>`_""" self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest( Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest( Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT)) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT)) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest( Group(self.simple_abundance)(TARGET)) if not allow_unqualified_translocations: self.translocation_unqualified.setParseAction( self.handle_translocation_illegal) self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) """`2.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocations>`_""" #: `2.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg>`_ self.degradation = degradation_tags + nest( Group(self.simple_abundance)(TARGET)) #: `2.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn>`_ self.reactants = Suppress(REACTANTS) + nest( delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest( delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest( Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst( [self.transformation, self.process, self.abundance]).streamline() # BEL Term to BEL Term Relationships #: `3.1.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xincreases>`_ increases_tag = oneOf(['->', '→', 'increases' ]).setParseAction(replaceWith(INCREASES)) #: `3.1.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdIncreases>`_ directly_increases_tag = one_of_tags(['=>', '⇒', 'directlyIncreases'], DIRECTLY_INCREASES) #: `3.1.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xdecreases>`_ decreases_tag = one_of_tags(['-|', 'decreases'], DECREASES) #: `3.1.4 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XdDecreases>`_ directly_decreases_tag = one_of_tags(['=|', 'directlyDecreases'], DIRECTLY_DECREASES) #: `3.5.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_analogous>`_ analogous_tag = one_of_tags(['analogousTo'], ANALOGOUS_TO) #: `3.1.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xcnc>`_ causes_no_change_tag = one_of_tags(['cnc', 'causesNoChange'], CAUSES_NO_CHANGE) #: `3.1.7 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_regulates_reg>`_ regulates_tag = one_of_tags(['reg', 'regulates'], REGULATES) #: `3.2.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XnegCor>`_ negative_correlation_tag = one_of_tags(['neg', 'negativeCorrelation'], NEGATIVE_CORRELATION) #: `3.2.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XposCor>`_ positive_correlation_tag = one_of_tags(['pos', 'positiveCorrelation'], POSITIVE_CORRELATION) #: `3.2.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xassociation>`_ association_tag = one_of_tags(['--', 'association'], ASSOCIATION) #: `3.3.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_orthologous>`_ orthologous_tag = one_of_tags(['orthologous'], ORTHOLOGOUS) #: `3.4.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_isa>`_ is_a_tag = Keyword(IS_A) #: PyBEL Variants equivalent_tag = one_of_tags(['eq', EQUIVALENT_TO], EQUIVALENT_TO) partof_tag = Keyword(PART_OF) self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof>`_ rate_limit_tag = oneOf(['rateLimitingStepOf']).setParseAction( replaceWith(RATE_LIMITING_STEP_OF)) self.rate_limit = triple( MatchFirst( [self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process) #: `3.4.6 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_subprocessof>`_ subprocess_of_tag = oneOf(['subProcessOf']).setParseAction( replaceWith(SUBPROCESS_OF)) self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process) #: `3.3.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_transcribedto>`_ transcribed_tag = oneOf([':>', 'transcribedTo' ]).setParseAction(replaceWith(TRANSCRIBED_TO)) self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translatedto>`_ translated_tag = oneOf(['>>', 'translatedTo' ]).setParseAction(replaceWith(TRANSLATED_TO)) self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmember>`_ has_member_tag = oneOf(['hasMember' ]).setParseAction(replaceWith(HAS_MEMBER)) self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hasmembers>`_ self.abundance_list = Suppress('list') + nest( delimitedList(Group(self.abundance))) has_members_tag = oneOf(['hasMembers']) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) has_components_tag = oneOf(['hasComponents']) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_hascomponent>`_ has_component_tag = oneOf(['hasComponent']).setParseAction( replaceWith(HAS_COMPONENT)) self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance) #: `3.5.2 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biomarkerfor>`_ biomarker_tag = oneOf(['biomarkerFor' ]).setParseAction(replaceWith(BIOMARKER_FOR)) #: `3.5.3 <http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_prognosticbiomarkerfor>`_ prognostic_biomarker_tag = oneOf([ 'prognosticBiomarkerFor' ]).setParseAction(replaceWith(PROGONSTIC_BIOMARKER_FOR)) biomarker_tags = biomarker_tag | prognostic_biomarker_tag self.biomarker = triple(self.bel_term, biomarker_tags, self.process) has_variant_tags = oneOf(['hasVariant' ]).setParseAction(replaceWith(HAS_VARIANT)) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) has_reactant_tags = oneOf(['hasReactant' ]).setParseAction(replaceWith(HAS_REACTANT)) has_product_tags = oneOf(['hasProduct' ]).setParseAction(replaceWith(HAS_PRODUCT)) part_of_reaction_tags = has_reactant_tags | has_product_tags self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction( self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term))) self.nested_causal_relationship.setParseAction( self.handle_nested_relation) self.label_relationship = And([ Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT) ]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction( self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BelParser, self).__init__(self.language, streamline=autostreamline) @property def namespace_dict(self): """The dictionary of {namespace: {name: encoding}} stored in the internal identifier parser :rtype: dict[str,dict[str,str]] """ return self.identifier_parser.namespace_dict @property def namespace_regex(self): """The dictionary of {namespace keyword: compiled regular expression} stored the internal identifier parser :rtype: dict[str,re] """ return self.identifier_parser.namespace_regex_compiled @property def annotation_dict(self): """A dictionary of annotations to their set of values :rtype: dict[str,set[str]] """ return self.control_parser.annotation_dict @property def annotation_regex(self): """A dictionary of annotations defined by regular expressions {annotation keyword: string regular expression} :rtype: dict[str,str] """ return self.control_parser.annotation_regex @property def allow_naked_names(self): """Should naked names be parsed, or should errors be thrown? :rtype: bool """ return self.identifier_parser.allow_naked_names def get_annotations(self): """Get current annotations in this parser :rtype: dict """ return self.control_parser.get_annotations() def clear(self): """Clears the graph and all control parser data (current citation, annotations, and statement group)""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line, position, tokens): """Handles nested statements. If :code:`allow_nested` is False, raises a warning. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: NestedRelationWarning """ if not self.allow_nested: raise NestedRelationWarning(self.line_number, line, position) self._handle_relation_harness( line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT] }) self._handle_relation_harness( line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT] }) return tokens def check_function_semantics(self, line, position, tokens): """Raises an exception if the function used on the tokens is wrong :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: InvalidFunctionSemantic """ if self.namespace_dict is None or NAMESPACE not in tokens: return tokens namespace, name = tokens[NAMESPACE], tokens[NAME] if namespace in self.namespace_regex: return tokens if self.allow_naked_names and tokens[ NAMESPACE] == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set( itt.chain.from_iterable( belns_encodings[k] for k in self.namespace_dict[namespace][name])) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic(self.line_number, line, position, tokens[FUNCTION], namespace, name, valid_functions) return tokens def handle_term(self, line, position, tokens): """Handles BEL terms (the subject and object of BEL relations) :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens, relation): """Provides the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`""" parent_node_tuple, parent_node_attr = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_tuple, child_node_attr = self.ensure_node(child_tokens) self.graph.add_unqualified_edge(parent_node_tuple, child_node_tuple, relation) return tokens def handle_has_members(self, line, position, tokens): """Handles list relations like ``p(X) hasMembers list(p(Y), p(Z), ...)`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ return self._handle_list_helper(tokens, HAS_MEMBER) def handle_has_components(self, line, position, tokens): """Handles list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ return self._handle_list_helper(tokens, HAS_COMPONENT) def _add_qualified_edge_helper(self, u, v, relation, annotations, subject_modifier, object_modifier): """Adds a qualified edge from the internal aspects of the parser""" self.graph.add_qualified_edge( u, v, relation=relation, evidence=self.control_parser.evidence, citation=self.control_parser.citation.copy(), annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **{LINE: self.line_number}) def _add_qualified_edge(self, u, v, relation, annotations, subject_modifier, object_modifier): """Adds an edge, then adds the opposite direction edge if it should""" self._add_qualified_edge_helper( u, v, relation=relation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper( v, u, relation=relation, annotations=annotations, object_modifier=subject_modifier, subject_modifier=object_modifier, ) def _handle_relation(self, tokens): """A policy in which all annotations are stored as sets, including single annotations :param pyparsing.ParseResult tokens: The tokens from PyParsing """ subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT]) object_node_tuple, _ = self.ensure_node(tokens[OBJECT]) subject_modifier = modifier_po_to_dict(tokens[SUBJECT]) object_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = { annotation_name: ({ae: True for ae in annotation_entry} if isinstance( annotation_entry, set) else { annotation_entry: True }) for annotation_name, annotation_entry in self.control_parser.annotations.items() } self._add_qualified_edge( subject_node_tuple, object_node_tuple, relation=tokens[RELATION], annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) def _handle_relation_harness(self, line, position, tokens): """Handles BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ if not self.control_parser.citation: raise MissingCitationException(self.line_number, line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.line_number, line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations( ) if missing_required_annotations: raise MissingAnnotationWarning(self.line_number, line, position, missing_required_annotations) self._handle_relation(tokens) return tokens def handle_unqualified_relation(self, line, position, tokens): """Handles unqualified relations :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT]) object_node_tuple, _ = self.ensure_node(tokens[OBJECT]) rel = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_tuple, object_node_tuple, rel) def handle_label_relation(self, line, position, tokens): """Handles statements like ``p(X) label "Label for X"`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RelabelWarning """ subject_node_tuple, _ = self.ensure_node(tokens[SUBJECT]) description = tokens[OBJECT] if self.graph.has_node_description(subject_node_tuple): raise RelabelWarning( line_number=self.line_number, line=line, position=position, node=self.graph.node, old_label=self.graph.get_node_description(subject_node_tuple), new_label=description) self.graph.set_node_description(subject_node_tuple, description) def ensure_node(self, tokens): """Turns parsed tokens into canonical node name and makes sure its in the graph :param pyparsing.ParseResult tokens: Tokens from PyParsing :return: A pair of the PyBEL node tuple and the PyBEL node data dictionary :rtype: tuple[tuple, dict] """ if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node_attr_dict = po_to_dict(tokens) node_tuple = self.graph.add_node_from_data(node_attr_dict) return node_tuple, node_attr_dict def handle_translocation_illegal(self, line, position, tokens): raise MalformedTranslocationWarning(self.line_number, line, position, tokens)
def __init__(self): self.json_query = {'query': {}, 'and': [], 'or': []} self.tokens = None #-------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> #-------------------------------------------------------------------------------------- integer = Regex(r'-?[0-9]+') # Word matches space for some reason double = Regex(r'-?[0-9]+.?[0-9]*') number = double | integer #-------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) #-------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex( r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction( removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction( removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral( "LON") + number units = CaselessLiteral('km') | CaselessLiteral('mi') distance = number + units distance.setParseAction(lambda x: self.frame.update({ 'dist': float(x[0]), 'units': x[1] })) #-------------------------------------------------------------------------------------- # Date #-------------------------------------------------------------------------------------- date = python_string #-------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 $ _ -'"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> #-------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction( lambda x: self.frame.update({'filter': x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x: self.frame.update({'index': x[0]})) resource_id = Regex( r'("(?:[a-zA-Z0-9\$_-])*"|\'(?:[a-zA-Z0-9\$_-]*)\')' ).setParseAction(removeQuotes) collection_id = resource_id #-------------------------------------------------------------------------------------- # <from-statement> ::= "FROM" <number> # <to-statement> ::= "TO" <number> #-------------------------------------------------------------------------------------- from_statement = CaselessLiteral("FROM") + number from_statement.setParseAction( lambda x: self.frame.update({'from': x[1]})) to_statement = CaselessLiteral("TO") + number to_statement.setParseAction(lambda x: self.frame.update({'to': x[1]})) #-------------------------------------------------------------------------------------- # <date-from-statement> ::= "FROM" <date> # <date-to-statement> ::= "TO" <date> #-------------------------------------------------------------------------------------- date_from_statement = CaselessLiteral("FROM") + date date_from_statement.setParseAction( lambda x: self.frame.update({'from': x[1]})) date_to_statement = CaselessLiteral("TO") + date date_to_statement.setParseAction( lambda x: self.frame.update({'to': x[1]})) #-------------------------------------------------------------------------------------- # <time-query> ::= "TIME FROM" <date> "TO" <date> #-------------------------------------------------------------------------------------- time_query = CaselessLiteral("TIME") + Optional( date_from_statement) + Optional(date_to_statement) time_query.setParseAction(lambda x: self.time_frame()) # time.mktime(dateutil.parser.parse(x[2])), 'to':time.mktime(dateutil.parser.parse(x[4]))}})) #-------------------------------------------------------------------------------------- # <time-bounds> ::= "TIMEBOUNDS" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- time_bounds = CaselessLiteral( "TIMEBOUNDS") + date_from_statement + date_to_statement time_bounds.setParseAction(lambda x: self.time_bounds_frame()) #-------------------------------------------------------------------------------------- # <vertical-bounds> ::= "VERTICAL" <from-statement> <to-statement> #-------------------------------------------------------------------------------------- vertical_bounds = CaselessLiteral( "VERTICAL") + from_statement + to_statement vertical_bounds.setParseAction(lambda x: self.vertical_bounds_frame()) #-------------------------------------------------------------------------------------- # <range-query> ::= "VALUES" [<from-statement>] [<to-statement>] #-------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + Optional( from_statement) + Optional(to_statement) # Add the range to the frame object range_query.setParseAction(lambda x: self.range_frame()) #-------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> #-------------------------------------------------------------------------------------- geo_distance = CaselessLiteral( "DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction( lambda x: self.frame.update({ 'lat': float(x[5]), 'lon': float(x[7]) })) geo_bbox = CaselessLiteral("BOX") + CaselessLiteral( "TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords geo_bbox.setParseAction(lambda x: self.frame.update({ 'top_left': [float(x[5]), float(x[3])], 'bottom_right': [float(x[10]), float(x[8])] })) #-------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <fuzzy-query> ::= "LIKE" <field-query> # <match-query> ::= "MATCH" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) #-------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query term_query.setParseAction(lambda x: self.frame.update({'value': x[1]})) geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox) fuzzy_query = CaselessLiteral("LIKE") + field_query fuzzy_query.setParseAction( lambda x: self.frame.update({'fuzzy': x[1]})) match_query = CaselessLiteral("MATCH") + field_query match_query.setParseAction( lambda x: self.frame.update({'match': x[1]})) #-------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> #-------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction( lambda x: self.json_query.update({'limit': int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction( lambda x: self.frame.update({'depth': int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral( "BY") + limited_string order_parameter.setParseAction( lambda x: self.json_query.update({'order': { x[2]: 'asc' }})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction( lambda x: self.json_query.update({'skip': int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter #-------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <fuzzy-query> | <match-query> | <time-query> | <time-bounds> | <vertical-bounds> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <owner-query> ::= "HAS" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> | <owner-query> #-------------------------------------------------------------------------------------- search_query = CaselessLiteral("SEARCH") + field + ( range_query | term_query | fuzzy_query | match_query | vertical_bounds | time_bounds | time_query | geo_query) + CaselessLiteral("FROM") + index_name # Add the field to the frame object search_query.setParseAction( lambda x: self.frame.update({'field': x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction( lambda x: self.frame.update({'collection': x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral( "TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction( lambda x: self.frame.update({'association': x[2]})) owner_query = CaselessLiteral("HAS") + resource_id + Optional( depth_parameter) owner_query.setParseAction( lambda x: self.frame.update({'owner': x[1]})) query = search_query | association_query | collection_query | owner_query #-------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* #-------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x: self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x: self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x: self.or_frame()) self.sentence = primary_query + (intersection ^ union) * ( 0, None) + query_parameter * (0, None)
def evaluator(variables, functions, string, cs=False): """ Evaluate an expression. Variables are passed as a dictionary from string to value. Unary functions are passed as a dictionary from string to function. Variables must be floats. cs: Case sensitive """ all_variables = copy.copy(DEFAULT_VARIABLES) all_functions = copy.copy(DEFAULT_FUNCTIONS) all_variables.update(variables) all_functions.update(functions) if not cs: string_cs = string.lower() all_functions = lower_dict(all_functions) all_variables = lower_dict(all_variables) CasedLiteral = CaselessLiteral else: string_cs = string CasedLiteral = Literal check_variables(string_cs, set(all_variables.keys() + all_functions.keys())) if string.strip() == "": return float('nan') # SI suffixes and percent number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()]) plus_minus = Literal('+') | Literal('-') times_div = Literal('*') | Literal('/') number_part = Word(nums) # 0.33 or 7 or .34 or 16. inner_number = (number_part + Optional("." + Optional(number_part))) | ( "." + number_part) # by default pyparsing allows spaces between tokens--Combine prevents that inner_number = Combine(inner_number) # 0.33k or -17 number = ( inner_number + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number.setParseAction(number_parse_action) # Convert to number # Predefine recursive variables expr = Forward() # Handle variables passed in. # E.g. if we have {'R':0.5}, we make the substitution. # We sort the list so that var names (like "e2") match before # mathematical constants (like "e"). This is kind of a hack. all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True) varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys]) varnames.setParseAction(lambda x: [all_variables[k] for k in x]) # if all_variables were empty, then pyparsing wants # varnames = NoMatch() # this is not the case, as all_variables contains the defaults # Same thing for functions. all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True) funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys]) function = funcnames + Suppress("(") + expr + Suppress(")") function.setParseAction(lambda x: [all_functions[x[0]](x[1])]) atom = number | function | varnames | Suppress("(") + expr + Suppress(")") # Do the following in the correct order to preserve order of operation pow_term = atom + ZeroOrMore(Suppress("^") + atom) pow_term.setParseAction(exp_parse_action) # 7^6 par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term) # 5k || 4k par_term.setParseAction(parallel) prod_term = par_term + ZeroOrMore(times_div + par_term) # 7 * 5 / 4 - 3 prod_term.setParseAction(prod_parse_action) sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term.setParseAction(sum_parse_action) expr << sum_term # finish the recursion return (expr + stringEnd).parseString(string)[0]
FLOAT_CONSTANT = Regex(r'-?\d*\.\d+') CONSTANT = STRING_CONSTANT | FLOAT_CONSTANT | INT_CONSTANT # All constants should be interpreted in Python format CONSTANT.setParseAction(lambda toks: ast.literal_eval(toks[0])) COMMA = Literal(',') LIST = Literal('[') + CONSTANT + ZeroOrMore(COMMA + CONSTANT) + Optional( COMMA) + Literal(']') IDENTIFIER = MatchFirst( [Word(alphas, alphanums), QuotedString('[', endQuoteChar=']')]) IDENTIFIER.setParseAction(lambda toks: Where(toks[0])) VALUE = CONSTANT | IDENTIFIER OPERATOR = MatchFirst([ Regex(r'==?'), Literal('!='), Literal('<>'), Literal('>='), Literal('<='), Literal('>'), Literal('<'), CaselessKeyword('like'), ])
def evaluator(variables, functions, string, cs=False): """ Evaluate an expression. Variables are passed as a dictionary from string to value. Unary functions are passed as a dictionary from string to function. Variables must be floats. cs: Case sensitive """ all_variables = copy.copy(DEFAULT_VARIABLES) all_functions = copy.copy(DEFAULT_FUNCTIONS) all_variables.update(variables) all_functions.update(functions) if not cs: string_cs = string.lower() all_functions = lower_dict(all_functions) all_variables = lower_dict(all_variables) CasedLiteral = CaselessLiteral else: string_cs = string CasedLiteral = Literal check_variables(string_cs, set(all_variables.keys() + all_functions.keys())) if string.strip() == "": return float('nan') # SI suffixes and percent number_suffix = MatchFirst([Literal(k) for k in SUFFIXES.keys()]) plus_minus = Literal('+') | Literal('-') times_div = Literal('*') | Literal('/') number_part = Word(nums) # 0.33 or 7 or .34 or 16. inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # by default pyparsing allows spaces between tokens--Combine prevents that inner_number = Combine(inner_number) # 0.33k or -17 number = (inner_number + Optional(CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number.setParseAction(number_parse_action) # Convert to number # Predefine recursive variables expr = Forward() # Handle variables passed in. # E.g. if we have {'R':0.5}, we make the substitution. # We sort the list so that var names (like "e2") match before # mathematical constants (like "e"). This is kind of a hack. all_variables_keys = sorted(all_variables.keys(), key=len, reverse=True) varnames = MatchFirst([CasedLiteral(k) for k in all_variables_keys]) varnames.setParseAction( lambda x: [all_variables[k] for k in x] ) # if all_variables were empty, then pyparsing wants # varnames = NoMatch() # this is not the case, as all_variables contains the defaults # Same thing for functions. all_functions_keys = sorted(all_functions.keys(), key=len, reverse=True) funcnames = MatchFirst([CasedLiteral(k) for k in all_functions_keys]) function = funcnames + Suppress("(") + expr + Suppress(")") function.setParseAction( lambda x: [all_functions[x[0]](x[1])] ) atom = number | function | varnames | Suppress("(") + expr + Suppress(")") # Do the following in the correct order to preserve order of operation pow_term = atom + ZeroOrMore(Suppress("^") + atom) pow_term.setParseAction(exp_parse_action) # 7^6 par_term = pow_term + ZeroOrMore(Suppress('||') + pow_term) # 5k || 4k par_term.setParseAction(parallel) prod_term = par_term + ZeroOrMore(times_div + par_term) # 7 * 5 / 4 - 3 prod_term.setParseAction(prod_parse_action) sum_term = Optional(plus_minus) + prod_term + ZeroOrMore(plus_minus + prod_term) # -5 + 4 - 3 sum_term.setParseAction(sum_parse_action) expr << sum_term # finish the recursion return (expr + stringEnd).parseString(string)[0]
def _build_field_expr(field_names): field = MatchFirst( [CaselessKeyword(field_name) for field_name in field_names]) field.setParseAction(lambda x: Field(x[0])) return field
# // BATTLE VERBS RUN_AWAY = oneOf(['run', 'run away', 'flee'], True, False) ENGAGE = oneOf(['attack', 'pounce', 'fight', 'engage'], True, False) BLOCK = oneOf(['dodge', 'parry', 'block'], True, False) def set_target(t): r = [{'target': [x for x in t]}] print(r) return r #// NORMALIZE OUR DIRECTIONS NORTH.setParseAction(lambda: [ { 'direction': 'NORTH', 'key': 0 }, ]) EAST.setParseAction(lambda: [ { 'direction': 'EAST', 'key': 1 }, ]) SOUTH.setParseAction(lambda: [ { 'direction': 'SOUTH', 'key': 2 }, ]) WEST.setParseAction(lambda: [
def __init__(self): self.json_query = {"query": {}, "and": [], "or": []} self.tokens = None # -------------------------------------------------------------------------------------- # <integer> ::= 0-9 # <double> ::= 0-9 ('.' 0-9) # <number> ::= <integer> | <double> # -------------------------------------------------------------------------------------- integer = Regex(r"-?[0-9]+") # Word matches space for some reason double = Regex(r"-?[0-9]+.?[0-9]*") number = double | integer # -------------------------------------------------------------------------------------- # <python-string> ::= (String surrounded by double-quotes) # <wildcard-string> ::= <python-string> # <limited-string> ::= '"' a..z A..Z 9..9 _ . '"' (alpha nums and ._ surrounded by double quotes) # <field> ::= <limited-string> | "*" # <coords> ::= "LAT" <number> "LON" <number> # <units> ::= ('km' | 'mi' | 'nm') # <distance> ::= REGEX(([0-9]*\.?[0-9]*)(km|mi|nm)?) # -------------------------------------------------------------------------------------- python_string = quotedString.setParseAction(removeQuotes) wildcard_string = python_string limited_string = Regex(r'("(?:[a-zA-Z0-9_\.])*"|\'(?:[a-zA-Z0-9_\.]*)\')').setParseAction(removeQuotes) field = limited_string ^ CaselessLiteral('"*"').setParseAction(removeQuotes) coords = CaselessLiteral("LAT") + number + CaselessLiteral("LON") + number units = CaselessLiteral("km") | CaselessLiteral("nm") | CaselessLiteral("mi") distance = number + units distance.setParseAction(lambda x: self.frame.update({"dist": float(x[0]), "units": x[1]})) # -------------------------------------------------------------------------------------- # <query-filter> ::= "FILTER" <python-string> # <index-name> ::= <python-string> # <resource-id> ::= '"' a..z A..Z 0..9 '"' (alpha nums surrounded by double quotes) # <collection-id> ::= <resource-id> # -------------------------------------------------------------------------------------- query_filter = CaselessLiteral("FILTER") + python_string # Add the filter to the frame object query_filter.setParseAction(lambda x: self.frame.update({"filter": x[1]})) index_name = MatchFirst(python_string) # Add the index to the frame object index_name.setParseAction(lambda x: self.frame.update({"index": x[0]})) resource_id = Regex(r'("(?:[a-zA-Z0-9])*"|\'(?:[a-zA-Z0-9]*)\')').setParseAction(removeQuotes) collection_id = resource_id # -------------------------------------------------------------------------------------- # <range-query> ::= "VALUES FROM" <number> "TO" <number> # -------------------------------------------------------------------------------------- range_query = CaselessLiteral("VALUES") + CaselessLiteral("FROM") + number + CaselessLiteral("TO") + number # Add the range to the frame object range_query.setParseAction(lambda x: self.frame.update({"range": {"from": float(x[2]), "to": float(x[4])}})) # -------------------------------------------------------------------------------------- # <geo-distance> ::= "DISTANCE" <distance> "FROM" <coords> # <geo-bbox> ::= "BOX" "TOP-LEFT" <coords> "BOTTOM-RIGHT" <coords> # -------------------------------------------------------------------------------------- geo_distance = CaselessLiteral("DISTANCE") + distance + CaselessLiteral("FROM") + coords geo_distance.setParseAction(lambda x: self.frame.update({"lat": float(x[5]), "lon": float(x[7])})) geo_bbox = ( CaselessLiteral("BOX") + CaselessLiteral("TOP-LEFT") + coords + CaselessLiteral("BOTTOM-RIGHT") + coords ) geo_bbox.setParseAction( lambda x: self.frame.update( {"top_left": [float(x[5]), float(x[3])], "bottom_right": [float(x[10]), float(x[8])]} ) ) # -------------------------------------------------------------------------------------- # <field-query> ::= <wildcard-string> # <term-query> ::= "IS" <field-query> # <geo-query> ::= "GEO" ( <geo-distance> | <geo-bbox> ) # -------------------------------------------------------------------------------------- field_query = wildcard_string term_query = CaselessLiteral("IS") + field_query # Add the term to the frame object term_query.setParseAction(lambda x: self.frame.update({"value": x[1]})) geo_query = CaselessLiteral("GEO") + (geo_distance | geo_bbox) # -------------------------------------------------------------------------------------- # <limit-parameter> ::= "LIMIT" <integer> # <depth-parameter> ::= "DEPTH" <integer> # <order-parameter> ::= "ORDER" "BY" <limited-string> # <offset-parameter> ::= "SKIP" <integer> # <query-parameter> ::= <order-paramater> | <limit-parameter> # -------------------------------------------------------------------------------------- limit_parameter = CaselessLiteral("LIMIT") + integer limit_parameter.setParseAction(lambda x: self.frame.update({"limit": int(x[1])})) depth_parameter = CaselessLiteral("DEPTH") + integer depth_parameter.setParseAction(lambda x: self.frame.update({"depth": int(x[1])})) order_parameter = CaselessLiteral("ORDER") + CaselessLiteral("BY") + limited_string order_parameter.setParseAction(lambda x: self.frame.update({"order": {x[2]: "asc"}})) offset_parameter = CaselessLiteral("SKIP") + integer offset_parameter.setParseAction(lambda x: self.frame.update({"offset": int(x[1])})) query_parameter = limit_parameter | order_parameter | offset_parameter # -------------------------------------------------------------------------------------- # <search-query> ::= "SEARCH" <field> (<range-query> | <term-query> | <geo-query>) "FROM" <index-name> [<query-parameter>]* # <collection-query> ::= "IN <collection-id>" # <association-query> ::= "BELONGS TO" <resource-id> [ <depth-parameter> ] # <query> ::= <search-query> | <association-query> | <collection-query> # -------------------------------------------------------------------------------------- search_query = ( CaselessLiteral("SEARCH") + field + (range_query | term_query | geo_query) + CaselessLiteral("FROM") + index_name + query_parameter * (0, None) ) # Add the field to the frame object search_query.setParseAction(lambda x: self.frame.update({"field": x[1]})) collection_query = CaselessLiteral("IN") + collection_id collection_query.setParseAction(lambda x: self.frame.update({"collection": x[1]})) association_query = CaselessLiteral("BELONGS") + CaselessLiteral("TO") + resource_id + Optional(depth_parameter) # Add the association to the frame object association_query.setParseAction(lambda x: self.frame.update({"association": x[2]})) query = search_query | association_query | collection_query # -------------------------------------------------------------------------------------- # <primary-query> ::= <query> [<query-filter>] # <atom> ::= <query> # <intersection> ::= "AND" <atom> # <union> ::= "OR" <atom> # <sentence> ::= <primary-query> [<intersection>]* [<union>]* # -------------------------------------------------------------------------------------- primary_query = query + Optional(query_filter) # Set the primary query on the json_query to the frame and clear the frame primary_query.setParseAction(lambda x: self.push_frame()) atom = query intersection = CaselessLiteral("AND") + atom # Add an AND operation to the json_query and clear the frame intersection.setParseAction(lambda x: self.and_frame()) union = CaselessLiteral("OR") + atom # Add an OR operation to the json_query and clear the frame union.setParseAction(lambda x: self.or_frame()) self.sentence = primary_query + (intersection ^ union) * (0, None)