class SDKConfig: """ Encapsulates an sdkconfig file. Defines grammar of a configuration entry, and enables evaluation of logical expressions involving those entries. """ # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar IDENTIFIER = Word(alphanums.upper() + "_") HEX = Combine("0x" + Word(hexnums)).setParseAction(lambda t:int(t[0], 16)) DECIMAL = Combine(Optional(Literal("+") | Literal("-")) + Word(nums)).setParseAction(lambda t:int(t[0])) LITERAL = Word(printables.replace(":", "")) QUOTED_LITERAL = quotedString.setParseAction(removeQuotes) VALUE = HEX | DECIMAL | LITERAL | QUOTED_LITERAL # Operators supported by the expression evaluation OPERATOR = oneOf(["=", "!=", ">", "<", "<=", ">="]) def __init__(self, kconfig_file, sdkconfig_file, env=[]): env = [(name, value) for (name,value) in (e.split("=",1) for e in env)] for name, value in env: value = " ".join(value.split()) os.environ[name] = value self.config = kconfiglib.Kconfig(kconfig_file) self.config.load_config(sdkconfig_file) def evaluate_expression(self, expression): result = self.config.eval_string(expression) if result == 0: # n return False elif result == 2: # y return True else: # m raise Exception("unsupported config expression result") @staticmethod def get_expression_grammar(): identifier = SDKConfig.IDENTIFIER.setResultsName("identifier") operator = SDKConfig.OPERATOR.setResultsName("operator") value = SDKConfig.VALUE.setResultsName("value") test_binary = identifier + operator + value test_single = identifier test = test_binary | test_single condition = Group(Optional("(").suppress() + test + Optional(")").suppress()) grammar = infixNotation(condition, [ ("!", 1, opAssoc.RIGHT), ("&&", 2, opAssoc.LEFT), ("||", 2, opAssoc.LEFT)]) return grammar
def _parse_cut_ami(text): """Parse "cut ami" command using pyparsing""" # Word == single token edctoken = Word(alphanums + '_') withtoken = Word(printables.replace('=', '')) preamble = Suppress(Literal('cut') + 'ami') # e.g. prod-edx-exdapp. Combining into 1 token enforces lack of whitespace e_d_c = Combine( edctoken('environment') + '-' + edctoken('deployment') + '-' + edctoken('cluster')) # e.g. cut ami for prod-edx-edxapp. Subsequent string literals are converted when added to a pyparsing object. for_from = Suppress('for') + e_d_c('for_edc') + Suppress( 'from') + e_d_c('from_edc') # e.g. with foo=bar bing=baz. # Group puts the k=v pairs in sublists instead of flattening them to the top-level token list. with_stmt = Suppress('with') with_stmt += OneOrMore( Group(withtoken('key') + Suppress('=') + withtoken('value')))('overrides') # e.g. using ami-deadbeef using_stmt = Suppress('using') + Regex('ami-[0-9a-f]{8}')('ami_id') # 0-1 with and using clauses in any order (see Each()) modifiers = Optional(with_stmt('with_stmt')) & Optional( using_stmt('using_stmt')) # 0-1 verbose and noop options in any order (as above) options = Optional(Literal('verbose')('verbose')) & Optional( Literal('noop')('noop')) pattern = StringStart( ) + preamble + options + for_from + modifiers + StringEnd() parsed = pattern.parseString(text) return { 'dest_env': parsed.for_edc.environment, 'dest_dep': parsed.for_edc.deployment, 'dest_play': parsed.for_edc.cluster, 'source_env': parsed.from_edc.environment, 'source_dep': parsed.from_edc.deployment, 'source_play': parsed.from_edc.cluster, 'base_ami': parsed.using_stmt.ami_id if parsed.using_stmt else None, 'version_overrides': {i.key: i.value for i in parsed.with_stmt.overrides} if parsed.with_stmt else None, 'verbose': bool(parsed.verbose), 'noop': bool(parsed.noop), }
def parser(self): """ This function returns a parser. The grammar should be like most full text search engines (Google, Tsearch, Lucene). Grammar: - a query consists of alphanumeric words, with an optional '*' wildcard at the end of a word - a sequence of words between quotes is a literal string - words can be used together by using operators ('and' or 'or') - words with operators can be grouped with parenthesis - a word or group of words can be preceded by a 'not' operator - the 'and' operator precedes an 'or' operator - if an operator is missing, use an 'and' operator """ operatorOr = Forward() ## operatorWord = Group(Combine(Word(alphanums) + Suppress('*'))).setResultsName('wordwildcard') | \ ## Group(Word(alphanums)).setResultsName('word') characters = printables.replace('"','').replace('*','').replace('(','').replace(')','') ##operatorWord = Group(Combine(Word(characters) + Suppress('*'))).setResultsName('wordwildcard') | \ ## Group(Word(characters)).setResultsName('word') # For including alphanums and punctuation operatorWord = Group(Combine(Suppress('*') + Word(characters) + Suppress('*'))).setResultsName('infixwordwildcard') | \ Group(Combine(Word(characters) + Suppress('*'))).setResultsName('suffixwordwildcard') | \ Group(Combine(Suppress('*') + Word(characters))).setResultsName('prefixwordwildcard') | \ Group(Word(characters)).setResultsName('word') # For including alphanums and punctuation operatorQuotesContent = Forward() operatorQuotesContent << ( (operatorWord + operatorQuotesContent) | operatorWord ) operatorQuotes = Group( Suppress('"') + operatorQuotesContent + Suppress('"') ).setResultsName("quotes") | operatorWord operatorParenthesis = Group( (Suppress("(") + operatorOr + Suppress(")")) ).setResultsName("parenthesis") | operatorQuotes operatorNot = Forward() operatorNot << (Group( Suppress(Keyword("not", caseless=True)) + operatorNot ).setResultsName("not") | operatorParenthesis) operatorAnd = Forward() operatorAnd << (Group( operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd ).setResultsName("and") | Group( operatorNot + OneOrMore(~oneOf("and or") + operatorAnd) ).setResultsName("and") | operatorNot) operatorOr << (Group( operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr ).setResultsName("or") | operatorAnd) return operatorOr.parseString
def __init__(self, scr, interp, statusbar=None, idle=None): """Initialise the repl with, unfortunately, a curses screen passed to it. This needs to be split up so the curses crap isn't in here. interp is a Python code.InteractiveInterpreter instance The optional 'idle' parameter is a function that the repl call while it's blocking (waiting for keypresses). This, again, should be in a different class""" self.cut_buffer = '' self.buffer = [] self.scr = scr self.interp = interp self.match = False self.rl_hist = [] self.stdout_hist = '' self.s_hist = [] self.history = [] self.h_i = 0 self.in_hist = False self.evaluating = False self.do_exit = False self.cpos = 0 # Use the interpreter's namespace only for the readline stuff: self.completer = rlcompleter.Completer(self.interp.locals) self.statusbar = statusbar self.list_win = curses.newwin(1, 1, 1, 1) self.idle = idle self.f_string = '' self.matches = [] self.argspec = None self.s = '' self.list_win_visible = False self._C = {} sys.stdin = FakeStdin(self) if not OPTS.arg_spec: return pythonhist = os.path.expanduser('~/.pythonhist') if os.path.exists(pythonhist): self.rl_hist = open(pythonhist, 'r').readlines() pexp = Forward() chars = printables.replace('(', '') chars = chars.replace(')', '') pexpnest = ( Optional(Word(chars)) + Literal("(") + Optional(Group(pexp)) + Optional(Literal(")"))) pexp << (OneOrMore(Word(chars) | pexpnest)) self.pparser = pexp
class SDKConfig: """ Evaluates conditional expressions based on the build's sdkconfig and Kconfig files. This also defines the grammar of conditional expressions. """ # A configuration entry is in the form CONFIG=VALUE. Definitions of components of that grammar IDENTIFIER = Word(alphanums.upper() + '_') HEX = Combine('0x' + Word(hexnums)).setParseAction(lambda t: int(t[0], 16)) DECIMAL = Combine(Optional(Literal('+') | Literal('-')) + Word(nums)).setParseAction(lambda t: int(t[0])) LITERAL = Word(printables.replace(':', '')) QUOTED_LITERAL = quotedString.setParseAction(removeQuotes) VALUE = HEX | DECIMAL | LITERAL | QUOTED_LITERAL # Operators supported by the expression evaluation OPERATOR = oneOf(['=', '!=', '>', '<', '<=', '>=']) def __init__(self, kconfig_file, sdkconfig_file): self.config = kconfiglib.Kconfig(kconfig_file) self.config.load_config(sdkconfig_file) def evaluate_expression(self, expression): result = self.config.eval_string(expression) if result == 0: # n return False elif result == 2: # y return True else: # m raise Exception('unsupported config expression result') @staticmethod def get_expression_grammar(): identifier = SDKConfig.IDENTIFIER.setResultsName('identifier') operator = SDKConfig.OPERATOR.setResultsName('operator') value = SDKConfig.VALUE.setResultsName('value') test_binary = identifier + operator + value test_single = identifier test = test_binary | test_single condition = Group( Optional('(').suppress() + test + Optional(')').suppress()) grammar = infixNotation(condition, [('!', 1, opAssoc.RIGHT), ('&&', 2, opAssoc.LEFT), ('||', 2, opAssoc.LEFT)]) return grammar
def query_parser() -> ParserElement: # Creating the grammar valid_chars = printables.replace("(", "").replace(")", "").replace( ":", "").replace("\"", "") expr = Forward().setName("expression") quotes = QuotedString( '"', "\\").setName("quoted string").setResultsName("quotes") brackets = Group(Literal("(").suppress() + expr + Literal(")").suppress()) \ .setName("bracketed expression").setResultsName("brackets") words = Word(valid_chars).setName("word").setResultsName("word") exception_elem = Group(quotes | words).setName("exception element")\ .setResultsName("exception_element", listAllMatches=True) exception = Group(exception_elem | ( Literal("(") + exception_elem + ZeroOrMore(pyparsing.Optional(CaselessKeyword("or")) + exception_elem) + Literal(")"))).setName("exception").setResultsName("exception") exception_connector = (CaselessKeyword("except") | CaselessKeyword("ignore"))\ .setName("Except").setResultsName("except") exception_word = words.setName("exception word").setResultsName( "exception_word") word_with_exception = Group(exception_word + exception_connector + exception) \ .setName("word with exception").setResultsName("word_with_exception") word_with_exception_brackets = Literal( "(") + word_with_exception + Literal(")") field_name = Group((Literal("@").suppress() + Word(valid_chars)) | (Word(valid_chars) + Literal(":").suppress())) \ .setName("field name").setResultsName("field_name") field_value = Group(quotes | word_with_exception_brackets | word_with_exception | words)\ .setName("field value").setResultsName("field_value") field = Group(field_name + field_value).setName("field").setResultsName("field") negator = Group(pyparsing.Optional(Literal("!") | Literal("-") | CaselessKeyword("not"))) \ .setName("negator").setResultsName("negator") element = Group(quotes | brackets | field | word_with_exception | words) \ .setName("element").setResultsName("element") full_element = Group(negator + element).setName("full element").setResultsName( "full_element", listAllMatches=True) connector = Group(pyparsing.Optional(CaselessKeyword("or") | CaselessKeyword("and"))) \ .setName("connector").setResultsName("connector", listAllMatches=True) expr <<= full_element + ZeroOrMore(connector + full_element) return expr
def parse_column_name(string): """Parse column name from select query Note: This assumes that every column has a name and is the last word of str Args: string(str): Input string to be parsed Returns: result(str): column name """ # Find all words in the string words = Word(printables.replace('\n\r', '')).searchString(string) # Get the last word matched # TODO: Make it more complicated name = words.pop().asList().pop() return name
def parse_column_name(string): """Parse column name from select query Note: This assumes that every column has a name and is the last word of str Args: string(str): Input string to be parsed Returns: result(str): column name """ # Find all words in the string words = Word(printables.replace("\n\r", "")).searchString(string) # Get the last word matched # TODO: Make it more complicated name = words.pop().asList().pop() return name
Suppress(Literal("*")) + SkipTo(Literal("*")) + Suppress(Literal("*"))).setParseAction(lambda t: [["italic", t[0]]]) # Ex: **this** is bold bold_text = ( Suppress(Literal("**")) + SkipTo(Literal("**")) + Suppress(Literal("**"))).setParseAction(lambda t: [["bold", t[0]]]) bold_italic_text = ( Suppress(Literal("***")) + SkipTo(Literal("***")) + Suppress(Literal("***"))).setParseAction(lambda t: [["bold-italic", t[0]]]) link_text = ( Suppress(Literal("[")) + ( Optional(url) + ZeroOrMore(Word(printables.replace( "]", ""))).setParseAction(lambda t: "text") # SkipTo(Literal("]")) ) + Suppress( Literal("]") + Literal("(") + SkipTo(Literal(")")) + Literal(")"))).setParseAction(lambda t: [["link", t[0]]]) # Ex: this is regular reg_text = ( OneOrMore( NotAny(italic_text | link_text) + Word(printables) | Suppress(Literal("\n")) + Word(printables)) # SkipTo(Literal("*")) | SkipTo(Literal("\n\n")) ).setParseAction(lambda t: [["regular", " ".join(t)]]) # Note: can be either two newlines or the end of the string
# we need to reimplement all this junk from pyparsing because pcre's # idea of escapable characters contains a lot more than the C-like # thing pyparsing implements _bslash = "\\" _sglQuote = Literal("'") _dblQuote = Literal('"') _escapables = printables _escapedChar = Word(_bslash, _escapables, exact=2) dblQuotedString = Combine( _dblQuote + ZeroOrMore( CharsNotIn('\\"\n\r') | _escapedChar | '""' ) + _dblQuote ).streamline().setName("string enclosed in double quotes") sglQuotedString = Combine( _sglQuote + ZeroOrMore( CharsNotIn("\\'\n\r") | _escapedChar | "''" ) + _sglQuote ).streamline().setName("string enclosed in single quotes") quotedArg = ( dblQuotedString | sglQuotedString ) quotedArg.setParseAction(removeQuotes) quotedArg.setName("quotedArg") plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "") plainArg = Word(plainArgChars) plainArg.setName("plainArg") arguments = Group(ZeroOrMore(quotedArg | plainArg)) arguments = arguments.setResultsName('arguments') arguments.setName("arguments") # comment line. comment = Literal('#') + restOfLine comment = comment.suppress() comment.setName('comment') full_command = ( comment | (command + arguments + Optional(comment))
def _create_field_parser(self): self.field = Word(printables.replace(COMMENT_START, '')) self._config(self.field, 'field', lambda: None)
hexadectet = Word(hexnums, min=1, max=4) ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 + hexadectet) ipv6_shortened_word_start = copy.deepcopy(alphanum_word_start) # the condition on the end of this grammar is designed to make sure that any shortened ipv6 addresses have '::' in them ipv6_address_shortened = Combine( OneOrMore(Or([hexadectet + Word(':'), Word(':')])) + hexadectet).addCondition(lambda tokens: tokens[0].count('::') > 0) ipv6_address = (Or([ipv6_address_full, ipv6_address_shortened ]).addCondition(lambda tokens: tokens[0].count(':') > 1) + alphanum_word_end) complete_email_comment = Combine('(' + Word(printables.replace(')', '')) + ')') # the complete_email_local_part grammar ignores the fact that characters like <<<(),:;<>@[\] >>> are possible in a quoted complete_email_local_part (and the double-quotes and backslash should be preceded by a backslash) complete_email_local_part = Combine( Optional(complete_email_comment)('email_address_comment') + Word(alphanums + "!#$%&'*+-/=?^_`{|}~." + '"') + Optional(complete_email_comment)('email_address_comment')) complete_email_address = Combine( complete_email_local_part('email_address_local_part') + "@" + Or([domain_name, '[' + ipv4_address + ']', '[IPv6:' + ipv6_address + ']'])('email_address_domain')) email_local_part = Word(alphanums, bodyChars=alphanums + "+-_.").setParseAction(downcaseTokens) email_address = alphanum_word_start + Combine( email_local_part('email_address_local_part') + "@" + Or([domain_name, '[' + ipv4_address + ']', '[IPv6:' + ipv6_address + ']'])
class BigSmilesPattern(SmilesPattern): #### DEFINITIONS of patterns involved in BigSMILES_Bond #### _BigSmilesBondChar = "$<>" _BondDesc = Word(_BigSmilesBondChar,exact=1).setResultsName('BigSMILES_Bondtype') + \ ( (Word(nums,exact=1).setResultsName('BigSMILES_Bondid') | \ Literal('%')+Word(nums,exact=2).setResultsName('BigSMILES_Bondid') ) )*(0,1) # _ladderBondDesc = Word(_bigsmilesBondChar,exact=1).setResultsName('BigSMILES_outerBondtype') + \ # '[' + _bondDesc + ']' + \ # (Word(nums,exact=1).setResultsName('BigSMILES_outerbondid') | \ # Literal('%')+Word(nums,exact=2).setResultsName('BigSMILES_outerbondid') ) # _bigsmilesBond = _ladderBondDesc.setResultsName('BigSMILES_ladderBond') | _bondDesc.setResultsName('BigSMILES_Bond') _BigSmilesBond = (Literal('[') + _BondDesc.setResultsName('BigSMILES_Bond') + Literal(']') ) #### DEFINITIONS of patterns involved in Augmented_SMILES #### # redefinition for the elements used in parsing of Augmented SMILES strings _AugmentedSmilesChar = SmilesPattern._smilesChar | _BigSmilesBond _AugmentedBranchContent = _AugmentedSmilesChar*(1,None) _AugmentedBranchContent.setParseAction(lambda toks: ''.join(toks)) _AugmentedBranch = nestedExpr('(',')',content=_AugmentedBranchContent) #_AugmentedBranch.setParseAction(lambda toks: '('+''.join([str(item) for sublist in toks for item in sublist])+')') _AugmentedBranch.setParseAction(lambda toks: '('+''.join(flatten_list(toks,str))+')') # _AugmentedSmilesElement explicitly used in Augmented_SMILES() _AugmentedSmilesElement = _AugmentedSmilesChar | _AugmentedBranch.setResultsName('branch') _AugmentedSmilesElement.addParseAction(SmilesPattern.addRawStr) #### DEFINITIONS of stochastic object _StoObjSepChar = ",;" #_BracketedBond = (Literal('[') + _BigSmilesBond + Literal(']')).setResultsName('BigSMILES_bracketedBond') _TerminalBond = (Literal('[') + (_BondDesc*(0,1)).setResultsName('BigSMILES_Bond') + Literal(']')).setResultsName('BigSMILES_terminalBond') _opener = StringStart() + _TerminalBond _opener.setParseAction(SmilesPattern.addRawStr) _closer = _TerminalBond + StringEnd() _closer.setParseAction(SmilesPattern.addRawStr) _StoObjSep = Word(',;',exact=1) printableExceptCurly = printables.replace('{', '').replace('}', '') _StoObjContent = Word(printableExceptCurly)#.setResultsName('StoObjCont') _StoObjContent.setParseAction(lambda toks: ''.join(toks)) _StoObj = nestedExpr('{','}',content=_StoObjContent) _StoObj.setParseAction(lambda toks: '{'+''.join(flatten_list(toks,str))+'}') #_StoObjDummy = (Literal('{') + Word(nums).setResultsName('StoObjId') + Literal('}')).setResultsName('BigSMILES_StoObj') def separateList(toks): L = [x for x in toks if x != ','] if not 'endGrp' in toks.keys(): toks['repUnit'] = L toks['endGrp'] = list() else: n = L.index(';') toks['repUnit'] = L[:n] toks['endGrp'] = L[n+1:] toks['rawStr'] = ''.join(toks) return toks printableExceptSemicolon = printables.replace(';', '') printableExceptCommaSemicolon = printableExceptSemicolon.replace(',', '') _StoObjUnit = Word(printableExceptCommaSemicolon) _StoObjList = _StoObjUnit + ("," + _StoObjUnit)*(0,None) _StoObjLists = _StoObjList.setResultsName('repUnit') + \ (Literal(';') + _StoObjList.setResultsName('endGrp'))*(0,1) _StoObjLists.setParseAction(separateList) #### DEFINITIONS of patterns involved in BigSMILES() #### _BigSmilesChar = SmilesPattern._smilesChar | _BigSmilesBond | _StoObj.setResultsName('BigSMILES_StoObj') _BigSmilesBranchContent = _BigSmilesChar*(1,None) _BigSmilesBranchContent.setParseAction(lambda toks: ''.join(toks)) _BigSmilesBranch = nestedExpr('(',')',content=_BigSmilesBranchContent) _BigSmilesBranch.setParseAction(lambda toks: '('+''.join(flatten_list(toks,str))+')') _BigSmilesElement = _BigSmilesChar | _BigSmilesBranch.setResultsName('branch') _BigSmilesElement.addParseAction(SmilesPattern.addRawStr) # additional definition of augmented SMILES for parsing the entire augmented SMILES segments (BigSMILES Chain Objects) # _augBranchContent = (SmilesPattern._smilesChar | _bigsmilesBond)*(1,None) # _augBranch = nestedExpr('(',')',content=_augBranchContent) # _augBranch.setParseAction(lambda toks: '('+''.join([str(item) for sublist in toks for item in sublist])+')') # _augSmilesElement = SmilesPattern._smilesChar | _augBranch # _bigsmilesChar = _augSmilesElement | _bigsmilesBond # _bigsmileschainObj = _bigsmilesChar*(1,None) # _bigsmileschainObj.setParseAction(lambda toks: ''.join(toks)) # bracketed bonds and definition for starting/ending patterns for stochastic objects # _opener, _closer, _stoObjExactContent explicitly used in BigSMILES_StoObj() # _bracketedBond = (Literal('[') + _bigsmilesBond + Literal(']')).setResultsName('BigSMILES_bracketedBond') # _opener = StringStart()+(_bracketedBond)*(0,1) # _opener.setParseAction(SmilesPattern.addRawStr) # _closer = (_bracketedBond)*(0,1)+StringEnd() # _closer.setParseAction(SmilesPattern.addRawStr) # _stoObjSep = Word(',;',exact=1) # _stoObjContent = (_AugmentedSmilesChar | _stoObjSep | _bracketedBond)*(1,None) # _stoObjContent.setParseAction(lambda toks: ''.join(toks)) # _stoObj = nestedExpr('{','}',content=_stoObjContent) #_stoObj.setParseAction(lambda toks: '{'+''.join([str(item) for sublist in toks for item in sublist])+'}') # _stoObj.setParseAction(lambda toks: '{'+''.join(flatten_list(toks,str))+'}') # _bigsmilesElement = _bigsmileschainObj.setResultsName('Augmented_SMILES') | \ # _stoObj.setResultsName('BigSMILES_StoObj') # _bigsmilesElement.setParseAction(SmilesPattern.addRawStr) def separateList(toks): L = [x for x in toks if x != ','] if not 'endGrp' in toks.keys(): toks['repUnit'] = L toks['endGrp'] = list() else: n = L.index(';') toks['repUnit'] = L[:n] toks['endGrp'] = L[n+1:] toks['rawStr'] = ''.join(toks) return toks
from price_parser import Price from pyparsing import Suppress, alphanums, Word, nums, printables, Combine, Regex, Literal, Optional _String = Word(printables) _Int = Word(nums).setParseAction(lambda s, loc, toks: int(toks[0])) # <@U088EGWEL> --> U088EGWEL _User = Combine(Suppress('<@') + Word(alphanums) + Suppress('>')) # <#C052EM50K|waterloo> --> C052EM50K _Channel = Combine( Suppress('<#') + Word(alphanums) + Word(printables.replace('>', '')).suppress() + Suppress('>')) # <mailto:[email protected]|[email protected]> --> [email protected] _Email = Combine( Suppress('<mailto:') + Word(alphanums + '@.') + Word(printables.replace('>', '')).suppress() + Suppress('>')) # :simple_smile: _Emoji = Combine( Suppress(':') + Word(printables.replace(':', '')) + Suppress(':')) _Money = Regex(r'[0-9$\.]+').setParseAction( lambda s, loc, toks: Price.fromstring(toks[0]).amount_float) _RestOfInput = Regex(r'.*') _Direction = Literal('to') | Literal('from') _Full = Optional(Literal('full')) _Commands = Literal('help') | Literal('ping') | Literal('pong') | Literal( 'sw') | Literal('mmr') | Literal('record') _help = Literal('.help') _help_advanced = Literal('.help') + Optional('.').suppress() + _Commands _ping = Literal('.ping') _pong = Literal('.pong')
import Orange.misc from Orange.core import \ BasketFeeder, FileExampleGenerator, BasketExampleGenerator, \ C45ExampleGenerator, TabDelimExampleGenerator, \ registerFileType as register_file_type import Orange.feature as variable from Orange.feature import Descriptor MakeStatus = Orange.feature.Descriptor.MakeStatus make = Orange.feature.Descriptor.make from pyparsing import (printables, originalTextFor, OneOrMore, quotedString, Word, delimitedList) # unquoted words can contain anything but a colon printables_no_colon = printables.replace(',', '') content = originalTextFor(OneOrMore(quotedString | Word(printables_no_colon))) def loadARFF(filename, create_on_new=MakeStatus.Incompatible, **kwargs): """Return class:`Orange.data.Table` containing data from file in Weka ARFF format if there exists no .xml file with the same name. If it does, a multi-label dataset is read and returned. """ if filename[-5:] == ".arff": filename = filename[:-5] if os.path.exists(filename + ".xml") and os.path.exists(filename + ".arff"): xml_name = filename + ".xml" arff_name = filename + ".arff" return Orange.multilabel.mulan.trans_mulan_data(xml_name, arff_name, create_on_new) else: return loadARFF_Weka(filename, create_on_new)
user_agent_platform = Combine( alphanum_word_start + Regex('[a-zA-Z]{2,}/?').addCondition( lambda tokens: tokens[0].lower().strip('/') != 'mozilla') + Optional(user_agent_platform_version)) user_agent = Combine( user_agent_start + user_agent_details + ZeroOrMore(user_agent_platform + Optional(user_agent_details)), joinString=' ', adjacent=False, ) # https://github.com/fhightower/ioc-finder/issues/13 # TODO: improve the windows_file_path grammar - it is pretty naive right now... the file_ending is very basic and it would be nice to have a list of common file endings, the windows_file_path grammar assumes that a path will not have a '.' in it (other than in the file name at the end), and the windows_file_path grammar assumes that the path will have a file name at the end (it will not match directory paths well) file_ending = Word(alphas, max=5) windows_file_path = alphanum_word_start + Combine( Word(alphanums, exact=1) + ':' + Word(printables.replace('.', '') + ' ') + '.' + file_ending) # we need to add '/' and '~' to the alphanum_word_start so that the grammar will match words starting with '/' and '~' # we add ':' to the alphanum_word_start because we want to avoid parsing urls are file paths (e.g. "//twitter.com" from "https://twitter.com/") unix_file_path_wordstart = copy.deepcopy(alphanum_word_start) unix_file_path_wordstart.wordChars.add(':') unix_file_path_wordstart.wordChars.add('/') unix_file_path_wordstart.wordChars.add('~') unix_file_path = unix_file_path_wordstart + Combine( Or(['~', '/']) + Word(printables.replace('.', '') + ' ') + '.' + file_ending).addCondition(lambda tokens: '//' not in tokens[0]) file_path = Or([windows_file_path, unix_file_path]) + alphanum_word_end # be aware that the phone_number grammar assumes that the text being sent to it has been reversed
from pyparsing import Word, printables, nestedExpr, Group, Forward, alphas, originalTextFor, Optional, Literal parser = argparse.ArgumentParser(description='Statically detect literal pairs') parser.add_argument('input_file', metavar='<INPUT_FILE>', help='SMT-LIB input') parser.add_argument( 'output_file', metavar='<OUTPUT_FILE>', help='SMT-LIB output -- same as input file with additional assertions') args = parser.parse_args() input_file = args.input_file output_file = args.output_file enclosed = Forward() svar = Word(printables.replace("()", "")) bvoneorzero = (Literal("#b1") | Literal("#b0")) eqpat = originalTextFor(nestedExpr(content="= " + enclosed)) neqpat = originalTextFor("(not " + eqpat + ")") enclosed << (svar | nestedExpr(opener='(', closer=')', content=None)) + bvoneorzero detectneq = (eqpat | neqpat) # tags POSTAG = 1 # uses one to encode a positive equality NEGTAG = 2 # uses two to encode a negated equality BOTHTAG = 3 # three encodes both the positive and negative equalities occur (add an assertion comparing the two) negval = {0: 1, 1: 0}
else: if t in ('or', 'and'): # Set the new op and go to next token operation = t elif t == '-': # Next tokens needs to be negated negation = True else: # Append to query the token if negation: t = ~t if operation == 'or': query |= t else: query &= t return query NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine( Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence( TERM, [(OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT),
DQ = Suppress('"') SP = Suppress(" ") CD = Suppress(',') CN = Suppress(':') ARROW_S = Suppress('->') MONTH = oneOf("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec") DAY = Word(nums) SL_TIME = Combine(Word(nums)+":"+Word(nums)+":"+Word(nums)) FULL_HEX = Word('x'+hexnums) PORT = Word(nums) IP4_ADDRESS = Combine(Word(nums) + ('.' + Word(nums))*3) IP6_ADDRESS = Word(hexnums+':') PRINTABLES_NO_CD = printables.replace(',', '') HOSTNAME = Word(alphanums+'.-_')("HOSTNAME") INT = Word(nums) HEX = Word(hexnums) WORD = Word(alphanums) TEXT = Group(OneOrMore(Word(printables))) EMPTY = Empty() DATA_NO_CD = Optional(Word(PRINTABLES_NO_CD)) SYSLOG_PROC = Combine(WORD("app") + Word("[") + INT("pid") + Word("]: ")) SYSLOG_APP = Combine(WORD("app") + Word(": ")) SYSLOG_TS = Combine(MONTH+" "+DAY+" "+SL_TIME) USERNAME = Word(alphanums)
def parse_query_string(self, query_string): # pylint: disable=too-many-locals """ Function that parse the querystring, extracting infos for limit, offset, ordering, filters, attribute and extra projections. :param query_string (as obtained from request.query_string) :return: parsed values for the querykeys """ from pyparsing import Word, alphas, nums, alphanums, printables, \ ZeroOrMore, OneOrMore, Suppress, Optional, Literal, Group, \ QuotedString, Combine, \ StringStart as SS, StringEnd as SE, \ WordEnd as WE, \ ParseException from pyparsing import pyparsing_common as ppc from dateutil import parser as dtparser from psycopg2.tz import FixedOffsetTimezone ## Define grammar # key types key = Word(f'{alphas}_', f'{alphanums}_') # operators operator = (Literal('=like=') | Literal('=ilike=') | Literal('=in=') | Literal('=notin=') | Literal('=') | Literal('!=') | Literal('>=') | Literal('>') | Literal('<=') | Literal('<')) # Value types value_num = ppc.number value_bool = ( Literal('true') | Literal('false')).addParseAction(lambda toks: bool(toks[0])) value_string = QuotedString('"', escQuote='""') value_orderby = Combine(Optional(Word('+-', exact=1)) + key) ## DateTimeShift value. First, compose the atomic values and then # combine # them and convert them to datetime objects # Date value_date = Combine( Word(nums, exact=4) + Literal('-') + Word(nums, exact=2) + Literal('-') + Word(nums, exact=2)) # Time value_time = Combine( Literal('T') + Word(nums, exact=2) + Optional(Literal(':') + Word(nums, exact=2)) + Optional(Literal(':') + Word(nums, exact=2))) # Shift value_shift = Combine( Word('+-', exact=1) + Word(nums, exact=2) + Optional(Literal(':') + Word(nums, exact=2))) # Combine atomic values value_datetime = Combine( value_date + Optional(value_time) + Optional(value_shift) + WE(printables.replace('&', '')) # To us the # word must end with '&' or end of the string # Adding WordEnd only here is very important. This makes atomic # values for date, time and shift not really # usable alone individually. ) ######################################################################## def validate_time(toks): """ Function to convert datetime string into datetime object. The format is compliant with ParseAction requirements :param toks: datetime string passed in tokens :return: datetime object """ datetime_string = toks[0] # Check the precision precision = len(datetime_string.replace('T', ':').split(':')) # Parse try: dtobj = dtparser.parse(datetime_string) except ValueError: raise RestInputValidationError( 'time value has wrong format. The ' 'right format is ' '<date>T<time><offset>, ' 'where <date> is expressed as ' '[YYYY]-[MM]-[DD], ' '<time> is expressed as [HH]:[MM]:[' 'SS], ' '<offset> is expressed as +/-[HH]:[' 'MM] ' 'given with ' 'respect to UTC') if dtobj.tzinfo is not None and dtobj.utcoffset() is not None: tzoffset_minutes = int(dtobj.utcoffset().total_seconds() // 60) return DatetimePrecision( dtobj.replace(tzinfo=FixedOffsetTimezone( offset=tzoffset_minutes, name=None)), precision) return DatetimePrecision( dtobj.replace(tzinfo=FixedOffsetTimezone(offset=0, name=None)), precision) ######################################################################## # Convert datetime value to datetime object value_datetime.setParseAction(validate_time) # More General types value = (value_string | value_bool | value_datetime | value_num | value_orderby) # List of values (I do not check the homogeneity of the types of values, # query builder will do it somehow) value_list = Group(value + OneOrMore(Suppress(',') + value) + Optional(Suppress(','))) # Fields single_field = Group(key + operator + value) list_field = Group(key + (Literal('=in=') | Literal('=notin=')) + value_list) orderby_field = Group(key + Literal('=') + value_list) field = (list_field | orderby_field | single_field) # Fields separator separator = Suppress(Literal('&')) # General query string general_grammar = SS() + Optional(field) + ZeroOrMore( separator + field) + \ Optional(separator) + SE() ## Parse the query string try: fields = general_grammar.parseString(query_string) # JQuery adds _=timestamp a parameter to not use cached data/response. # To handle query, remove this "_" parameter from the query string # For more details check issue #789 # (https://github.com/aiidateam/aiida-core/issues/789) in aiida-core field_list = [ entry for entry in fields.asList() if entry[0] != '_' ] except ParseException as err: raise RestInputValidationError( 'The query string format is invalid. ' "Parser returned this massage: \"{" "}.\" Please notice that the column " 'number ' 'is counted from ' 'the first character of the query ' 'string.'.format(err)) ## return the translator instructions elaborated from the field_list return self.build_translator_parameters(field_list)
def _create_grammar(): """Create the DBC grammar. """ word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums).setName('positive integer') number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') frame_id = Word(nums).setName('frame id') version = Group(Keyword('VERSION') - QuotedString()) version.setName(VERSION) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') - colon - Group(ZeroOrMore(symbol))) symbols.setName('NS_') discard = Suppress(Keyword('BS_') - colon).setName('BS_') nodes = Group(Keyword('BU_') - colon - Group(ZeroOrMore(node))) nodes.setName('BU_') signal = Group( Keyword(SIGNAL) - Group(word + Optional(word)) - colon - Group(positive_integer - pipe - positive_integer - at - positive_integer - sign) - Group(lp - number - comma - number - rp) - Group(lb - number - pipe - number - rb) - QuotedString() - Group(delimitedList(node))) signal.setName(SIGNAL) message = Group( Keyword(MESSAGE) - frame_id - word - colon - positive_integer - word - Group(ZeroOrMore(signal))) message.setName(MESSAGE) event = Suppress( Keyword(EVENT) - word - colon - positive_integer - lb - number - pipe - number - rb - QuotedString() - number - number - word - node - scolon) event.setName(EVENT) comment = Group( Keyword(COMMENT) - ((Keyword(SIGNAL) - frame_id - word - QuotedString() - scolon).setName(SIGNAL) | (Keyword(MESSAGE) - frame_id - QuotedString() - scolon).setName(MESSAGE) | (Keyword(EVENT) - word - QuotedString() - scolon).setName(EVENT) | (Keyword(NODES) - word - QuotedString() - scolon).setName(NODES) | (QuotedString() - scolon).setName('QuotedString'))) comment.setName(COMMENT) attribute_definition = Group( Keyword(ATTRIBUTE_DEFINITION) - ((QuotedString()) | (Keyword(SIGNAL) | Keyword(MESSAGE) | Keyword(EVENT) | Keyword(NODES)) + QuotedString()) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition.setName(ATTRIBUTE_DEFINITION) attribute_definition_default = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default.setName(ATTRIBUTE_DEFINITION_DEFAULT) attribute = Group( Keyword(ATTRIBUTE) - QuotedString() - Group( Optional((Keyword(MESSAGE) + frame_id) | (Keyword(SIGNAL) + frame_id + word) | (Keyword(NODES) + word))) - (QuotedString() | number) - scolon) attribute.setName(ATTRIBUTE) choice = Group( Keyword(CHOICE) - Group(Optional(frame_id)) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) choice.setName(CHOICE) value_table = Group( Keyword(VALUE_TABLE) - word - Group(OneOrMore(Group(integer + QuotedString()))) - scolon) value_table.setName(VALUE_TABLE) signal_type = Group( Keyword(SIGNAL_TYPE) - frame_id - word - colon - positive_integer - scolon) signal_type.setName(SIGNAL_TYPE) signal_multiplexer_values = Group( Keyword(SIGNAL_MULTIPLEXER_VALUES) - frame_id - word - word - Group( delimitedList(positive_integer - Suppress('-') - Suppress(positive_integer))) - scolon) signal_multiplexer_values.setName(SIGNAL_MULTIPLEXER_VALUES) message_add_sender = Group( Keyword(MESSAGE_TX_NODE) - frame_id - colon - Group(delimitedList(node)) - scolon) message_add_sender.setName(MESSAGE_TX_NODE) attribute_definition_rel = Group( Keyword(ATTRIBUTE_DEFINITION_REL) - (QuotedString() | (Keyword(NODES_REL) + QuotedString())) - word - (scolon | (Group(ZeroOrMore(Group( (comma | Empty()) + QuotedString()))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) attribute_definition_rel.setName(ATTRIBUTE_DEFINITION_REL) attribute_definition_default_rel = Group( Keyword(ATTRIBUTE_DEFINITION_DEFAULT_REL) - QuotedString() - (number | QuotedString()) - scolon) attribute_definition_default_rel.setName(ATTRIBUTE_DEFINITION_DEFAULT_REL) attribute_rel = Group( Keyword(ATTRIBUTE_REL) - QuotedString() - Keyword(NODES_REL) - word - Keyword(SIGNAL) - frame_id - word - (positive_integer | QuotedString()) - scolon) attribute_rel.setName(ATTRIBUTE_REL) signal_group = Group( Keyword(SIGNAL_GROUP) - frame_id - word - integer - colon - OneOrMore(word) - scolon) signal_group.setName(SIGNAL_GROUP) entry = (message | comment | attribute | choice | attribute_definition | attribute_definition_default | attribute_rel | attribute_definition_rel | attribute_definition_default_rel | signal_group | event | message_add_sender | value_table | signal_type | signal_multiplexer_values | discard | nodes | symbols | version) frame_id.setParseAction(lambda _s, _l, t: int(t[0])) return OneOrMore(entry) + StringEnd()
else: if t in ("or", "and"): # Set the new op and go to next token operation = t elif t == "-": # Next tokens needs to be negated negation = True else: # Append to query the token if negation: t = ~t if operation == "or": query |= t else: query &= t return query NO_BRTS = printables.replace("(", "").replace(")", "") SINGLE = Word(NO_BRTS.replace("*", "")) WILDCARDS = Optional("*") + SINGLE + Optional("*") + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral("and") OPER_OR = CaselessLiteral("or") OPER_NOT = "-" TERM = Combine( Optional(Word(alphas).setResultsName("meta") + ":") + (QUOTED.setResultsName("query") | WILDCARDS.setResultsName("query")) ) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence(
def _create_grammar_6_0(): """Create the SYM 6.0 grammar. """ word = Word(printables.replace(';', '').replace(':', '')) positive_integer = Word(nums) number = Word(nums + '.Ee-+') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) name = Word(alphas + nums + '_-').setWhitespaceChars(' ') assign = Suppress(Literal('=')) comma = Suppress(Literal(',')) type_ = name version = Group(Keyword('FormatVersion') - assign - Keyword('6.0')) title = Group(Keyword('Title') - assign - QuotedString('"')) enum_value = Group(number + assign + QuotedString('"')) enum = Group( Suppress(Keyword('Enum')) - assign - name - Suppress(lp) + Group(delimitedList(enum_value)) - Suppress(rp)) sig_unit = Group(Literal('/u:') + word) sig_factor = Group(Literal('/f:') + word) sig_offset = Group(Literal('/o:') + word) sig_min = Group(Literal('/min:') + word) sig_max = Group(Literal('/max:') + word) sig_default = Group(Literal('/d:') + word) sig_long_name = Group(Literal('/ln:') + word) sig_enum = Group(Literal('/e:') + word) signal = Group( Suppress(Keyword('Sig')) - Suppress(assign) - name - type_ + Group(Optional(positive_integer)) + Group(Optional(Keyword('-m'))) + Group( Optional(sig_unit) + Optional(sig_factor) + Optional(sig_offset) + Optional(sig_min) + Optional(sig_max) + Optional(sig_default) + Optional(sig_long_name) + Optional(sig_enum))) symbol = Group( Suppress(lb) - name - Suppress(rb) - Group(Optional(Keyword('ID') + assign + word)) - Group(Keyword('Len') + assign + positive_integer) + Group( Optional( Keyword('Mux') + assign + word + positive_integer + comma + positive_integer + positive_integer)) + Group(Optional(Keyword('CycleTime') + assign + positive_integer)) + Group(Optional(Keyword('Timeout') + assign + positive_integer)) + Group(Optional(Keyword('MinInterval') + assign + positive_integer)) + Group( ZeroOrMore(Group( Keyword('Sig') + assign + name + positive_integer)))) enums = Group(Keyword('{ENUMS}') + Group(ZeroOrMore(enum))) signals = Group(Keyword('{SIGNALS}') + Group(ZeroOrMore(signal))) send = Group(Keyword('{SEND}') + Group(ZeroOrMore(symbol))) receive = Group(Keyword('{RECEIVE}') + Group(ZeroOrMore(symbol))) sendreceive = Group(Keyword('{SENDRECEIVE}') + Group(ZeroOrMore(symbol))) section = (enums | signals | send | receive | sendreceive) grammar = (version - title + Group(OneOrMore(section)) + StringEnd()) grammar.ignore(dblSlashComment) return grammar
def _create_dbc_grammar(): """Create DBC grammar. """ # DBC file grammar. word = Word(printables.replace(';', '').replace(':', '')) integer = Group(Optional('-') + Word(nums)) positive_integer = Word(nums) number = Word(nums + '.Ee-+') colon = Suppress(Literal(':')) scolon = Suppress(Literal(';')) pipe = Suppress(Literal('|')) at = Suppress(Literal('@')) sign = Literal('+') | Literal('-') lp = Suppress(Literal('(')) rp = Suppress(Literal(')')) lb = Suppress(Literal('[')) rb = Suppress(Literal(']')) comma = Suppress(Literal(',')) node = Word(alphas + nums + '_-').setWhitespaceChars(' ') version = Group(Keyword('VERSION') + QuotedString('"', multiline=True)) symbol = Word(alphas + '_') + Suppress(LineEnd()) symbols = Group(Keyword('NS_') + colon + Group(ZeroOrMore(symbol))) discard = Suppress(Keyword('BS_') + colon) nodes = Group(Keyword('BU_') + colon + Group(ZeroOrMore(node))) signal = Group( Keyword(SIGNAL) + Group(word + Optional(word)) + colon + Group(positive_integer + pipe + positive_integer + at + positive_integer + sign) + Group(lp + number + comma + number + rp) + Group(lb + number + pipe + number + rb) + QuotedString('"', multiline=True) + Group(delimitedList(node))) message = Group( Keyword(MESSAGE) + positive_integer + word + colon + positive_integer + word + Group(ZeroOrMore(signal))) event = Suppress( Keyword(EVENT) + word + colon + positive_integer + lb + number + pipe + number + rb + QuotedString('"', multiline=True) + number + number + word + node + scolon) comment = Group( Keyword(COMMENT) + ((Keyword(MESSAGE) + positive_integer + QuotedString('"', multiline=True) + scolon) | (Keyword(SIGNAL) + positive_integer + word + QuotedString('"', multiline=True) + scolon) | (Keyword(NODES) + word + QuotedString('"', multiline=True) + scolon) | (Keyword(EVENT) + word + QuotedString('"', multiline=True) + scolon))) attribute = Group( Keyword(ATTRIBUTE) + ((QuotedString('"', multiline=True)) | (Keyword(SIGNAL) + QuotedString('"', multiline=True)) | (Keyword(MESSAGE) + QuotedString('"', multiline=True)) | (Keyword(EVENT) + QuotedString('"', multiline=True)) | (Keyword(NODES) + QuotedString('"', multiline=True))) + word + ((scolon) | (Group( ZeroOrMore( Group((comma | Empty()) + QuotedString('"', multiline=True)))) + scolon) | (Group(ZeroOrMore(number)) + scolon))) default_attr = Group( Keyword(DEFAULT_ATTR) + QuotedString('"', multiline=True) + (positive_integer | QuotedString('"', multiline=True)) + scolon) attr_definition = Group( Keyword(ATTR_DEFINITION) + QuotedString('"', multiline=True) + Group( Optional((Keyword(MESSAGE) + positive_integer) | (Keyword(SIGNAL) + positive_integer + word) | (Keyword(NODES) + word))) + (QuotedString('"', multiline=True) | positive_integer) + scolon) choice = Group( Keyword(CHOICE) + Optional(positive_integer) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) value_table = Group( Keyword(VALUE_TABLE) + word + Group(OneOrMore(Group(integer + QuotedString('"', multiline=True)))) + scolon) entry = (version | symbols | discard | nodes | message | comment | attribute | default_attr | attr_definition | choice | value_table | event) grammar = OneOrMore(entry) + StringEnd() return grammar
else: if t in ('or', 'and'): # Set the new op and go to next token operation = t elif t == '-': # Next tokens needs to be negated negation = True else: # Append to query the token if negation: t = ~t if operation == 'or': query |= t else: query &= t return query NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine(Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence(TERM, [ (OPER_NOT, 1, opAssoc.RIGHT),
import sys from logging import critical from pyparsing import Optional, Literal, Word, Group from pyparsing import Suppress, Combine, replaceWith from pyparsing import alphas, nums, printables, alphanums from pyparsing import restOfLine, oneOf, OneOrMore, ZeroOrMore from pyparsing import ParseException __all__ = ("extract_html_line", "extract_html_lines", "RTFParserException", "parse", "write_html_document") class RTFParserException(Exception): "indicate failed RTF parsing" htmchars = printables.replace("<","").replace(">","").replace("\\","").replace("{","").replace("}","") + " " + "\t" SEP = Literal(';') BRCKT_L = Literal('{') BRCKT_R = Literal('}') BRCKT = BRCKT_L | BRCKT_R BRCKT.setName("Bracket") # basic RTF control codes, ie. "\labelname3434" CTRL_LABEL = Combine(Word(alphas + "'") + Optional(Word(nums))) BASE_CTRL = Combine(Literal('\\') + CTRL_LABEL) # in some rare cases (color table declarations), control has ' ;' suffix BASE_CTRL = Combine(BASE_CTRL + SEP) | BASE_CTRL BASE_CTRL.setName("BaseControl")
_bslash = "\\" _sglQuote = Literal("'") _dblQuote = Literal('"') _escapables = printables _escapedChar = Word(_bslash, _escapables, exact=2) dblQuotedString = Combine( _dblQuote + ZeroOrMore(CharsNotIn('\\"\n\r') | _escapedChar | '""') + _dblQuote).streamline().setName("string enclosed in double quotes") sglQuotedString = Combine( _sglQuote + ZeroOrMore(CharsNotIn("\\'\n\r") | _escapedChar | "''") + _sglQuote).streamline().setName("string enclosed in single quotes") quotedArg = (dblQuotedString | sglQuotedString) quotedArg.setParseAction(removeQuotes) quotedArg.setName("quotedArg") plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "") plainArg = Word(plainArgChars) plainArg.setName("plainArg") arguments = Group(ZeroOrMore(quotedArg | plainArg)) arguments = arguments.setResultsName('arguments') arguments.setName("arguments") # comment line. comment = Literal('#') + restOfLine comment = comment.suppress() comment.setName('comment') full_command = (comment | (command + arguments + Optional(comment))) full_command.setName('full_command')
nums, asKeyword=True).addCondition(lambda tokens: int(tokens[0]) < 256) # basically, the grammar below says: start any words that start with a '.' or a number; I want to match words that start with a '.' because this will fail later in the grammar and I do not want to match anything that start with a '.' ipv4_address = alphanum_word_start + WordStart('.' + nums) + Combine( (ipv4_section + '.') * 3 + ipv4_section) + NotAny( Regex('\.\S')) + alphanum_word_end hexadectet = Word(hexnums, min=1, max=4) ipv6_address_full = alphanum_word_start + Combine((hexadectet + ":") * 7 + hexadectet) # todo: the ipv6_address_shortened grammar needs some fine-tuning so it doesn't pull in content too broadly ipv6_address_shortened = Combine( OneOrMore(Or([hexadectet + Word(':'), Word(':')])) + hexadectet) ipv6_address = Or([ipv6_address_full, ipv6_address_shortened ]) + alphanum_word_end complete_email_comment = Combine('(' + Word(printables.replace(')', '')) + ')') # the complete_email_local_part grammar ignores the fact that characters like <<<(),:;<>@[\] >>> are possible in a quoted complete_email_local_part (and the double-quotes and backslash should be preceded by a backslash) complete_email_local_part = Combine( Optional(complete_email_comment)('email_address_comment') + Word(alphanums + "!#$%&'*+-/=?^_`{|}~." + '"') + Optional(complete_email_comment)('email_address_comment')) complete_email_address = Combine( complete_email_local_part('email_address_local_part') + "@" + Or([domain_name, '[' + ipv4_address + ']', '[IPv6:' + ipv6_address + ']'])('email_address_domain')) email_local_part = Word(alphanums + "+-_.") email_address = alphanum_word_start + Combine( email_local_part('email_address_local_part') + "@" + Or([domain_name, '[' + ipv4_address + ']', '[IPv6:' + ipv6_address + ']']) ('email_address_domain'))