Ejemplo n.º 1
0
    def clear_sql(sql: str) -> str:
        ''' remove comments from sql
            TODO current implementation is not remove /**/ from mid of string:
            select a, /*comment*/ from b
        '''
        # remove /*comment*/
        ParserElement.defaultWhitespaceChars = (" \t")
        comment = nestedExpr('/*', '*/').suppress()
        starting = ZeroOrMore(comment.suppress())
        ending = ZeroOrMore(comment | ';').suppress() + StringEnd()
        expr = starting + SkipTo(ending) + ending
        sql = expr.transformString(sql)

        # remove -- and # comments
        oracleSqlComment = Literal("--") + restOfLine
        mySqlComment = Literal("#") + restOfLine

        expr = (originalTextFor(QuotedString("'"))
                | originalTextFor(QuotedString('"'))
                | originalTextFor(QuotedString('`'))
                | (oracleSqlComment | mySqlComment).suppress())

        sql = expr.transformString(sql)
        sql = sql.strip(' \n\t')

        return sql
Ejemplo n.º 2
0
    def parse_as_create_predictor(self) -> dict:
        CREATE, PREDICTOR, FROM, WHERE, PREDICT, AS, ORDER, GROUP, BY, WINDOW, HORIZON, USING, ASK, DESC = map(
            CaselessKeyword,
            "CREATE PREDICTOR FROM WHERE PREDICT AS ORDER GROUP BY WINDOW HORIZON USING ASK DESC"
            .split())
        ORDER_BY = ORDER + BY
        GROUP_BY = GROUP + BY

        word = Word(alphanums + "_")

        s_int = Word(nums).setParseAction(tokenMap(int))

        predict_item = Group(
            word('name') + Optional(AS.suppress() + word('alias')))

        order_item = Group(word('name') + Optional(ASK | DESC)('sort'))

        using_item = Group(
            word('name') + Word('=').suppress() +
            (word | QuotedString("'"))('value'))

        expr = (
            CREATE + PREDICTOR + word('predictor_name') + FROM +
            Optional(word)('integration_name') +
            originalTextFor(nestedExpr('(', ')'))('select') +
            Optional(AS + word('datasource_name')) + PREDICT +
            delimitedList(predict_item, delim=',')('predict') +
            Optional(ORDER_BY +
                     delimitedList(order_item, delim=',')('order_by')) +
            Optional(GROUP_BY + delimitedList(word, delim=',')('group_by')) +
            Optional(WINDOW + s_int('window')) +
            Optional(HORIZON + s_int('nr_predictions')) + Optional(
                (USING + delimitedList(using_item, delim=',')('using'))
                | (USING + originalTextFor(nestedExpr('{', '}'))('using'))))

        r = expr.parseString(self._sql)

        # postprocessing
        r = r.asDict()
        if r['select'].startswith('(') and r['select'].endswith(')'):
            r['select'] = r['select'][1:-1]
        r['select'] = r['select'].strip(' \n')

        using = r.get('using')
        if isinstance(using, str):
            r['using'] = json.loads(using)
        elif isinstance(using, list):
            new_using = {}
            for el in using:
                if el['name'] == 'stop_training_in_x_seconds':
                    new_using['time_aim'] = el['value']
                else:
                    new_using[el['name']] = el['value']
            r['using'] = new_using

        if isinstance(r.get('order_by'), list):
            r['order_by'] = [x['name'] for x in r['order_by']]

        return r
Ejemplo n.º 3
0
def parse_config_file(filepath):
    """
    This function defines that to parsed the netscalar input file
    :param filepath: path of netscalar input configuration
    :return: return parsed dict
    """

    EOL = LineEnd().suppress()
    comment = Suppress("#") + Suppress(restOfLine) + EOL
    SOL = LineStart().suppress()
    blank_line = SOL + EOL
    result = []
    hyphen = Literal("-")
    not_hyphen_sign = ''.join(c for c in printables if c != '-')
    text = Word(not_hyphen_sign, printables)
    key = Word('-',
               printables).setParseAction(lambda t: t[0].replace('-', '', 1))
    val = originalTextFor(Optional(ZeroOrMore(text), default=None))
    option = Group(key + val)
    multi_word_names = quotedString
    q_obj = originalTextFor(Keyword('q{') + SkipTo(Keyword("}")))
    command = Group(
        OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option))
    command.ignore(comment | blank_line)
    with open(filepath) as infile:
        line_no = 1
        print("Parsing Input Configuration...")
        lines = infile.readlines()
        total_lines = len(lines)
        for line in lines:
            try:
                tmp = command.parseString(line)
                tokens = tmp.asList()
                if tokens:
                    tokens[0].append(['line_no', str(line_no)])
                result += tokens
                line_no += 1
            except Exception as exception:
                line_no += 1
                LOG.error("Parsing error: " + line)
            msg = "Parsing started..."
            if line_no <= total_lines:
                ns_util.print_progress_bar(line_no,
                                           total_lines,
                                           msg,
                                           prefix='Progress',
                                           suffix='')
        return result
Ejemplo n.º 4
0
def _define_vs():
    KEY = Word(alphas + '_$', alphanums +
               '_$').setName('identifier').setResultsName('key')  # noqa
    VALUE = originalTextFor(_define_json()).setResultsName('value')
    # validator name, eg: int
    NAME = Optional(
        Optional(Suppress('?')) +
        pyparsing_common.identifier.setResultsName('name'))  # noqa
    # refers, eg: @xx@yy
    REFERS = Group(ZeroOrMore(Suppress('@') +
                              pyparsing_common.identifier)).setResultsName(
                                  'refers')  # noqa
    # args, eg: (), (1), (1,2,3), ([1,2], {"key":"value"}, "Any JSON")
    ARGS = Group(
        Optional(
            Suppress('(') + Optional(delimitedList(VALUE)) +
            Suppress(')'))).setResultsName('args')  # noqa
    # key-value, eg: key, key=True, key=[1,2,3]
    KW = Group(KEY + Optional(Suppress('=') + VALUE))
    # kwargs, eg: &key1&key2=True&key3=[1,2,3]
    KWARGS = Group(ZeroOrMore(Suppress('&') + KW)).setResultsName('kwargs')
    # lead xxx is key: xxx@yyy, xxx?yyy, $self&abc
    # lead xxx except '$self' is validator name: xxx(1,2), xxx&abc, xxx
    SELF = Literal('$self').setResultsName('key')
    VS_KEY = Optional((KEY + FollowedBy(Word('@?'))) | SELF)
    VS_DEF = REFERS + NAME + ARGS + KWARGS
    return StringStart() + VS_KEY + VS_DEF + StringEnd()
Ejemplo n.º 5
0
    def parse_as_create_datasource(self) -> dict:
        ''' Parse 'CREATE DATASOURCE' query
            Example: CREATE DATASOURCE name FROM mysql WITH {"user": "******", "password": "******", "host": "127.0.0.1"}
        '''
        result = {
            'datasource_name': None,
            'database_type': None,
            'connection_args': None
        }

        expr = (CaselessKeyword("create").suppress() +
                CaselessKeyword("datasource").suppress() +
                Word(printables).setResultsName('datasource_name') +
                CaselessKeyword("from").suppress() +
                Word(printables).setResultsName('database_type') +
                CaselessKeyword("with").suppress() +
                originalTextFor(nestedExpr('{', '}'))('connection_args'))

        r = expr.parseString(self._sql).asDict()

        datasource_name = r.get('datasource_name')
        if isinstance(datasource_name, str) is False:
            raise Exception("Cant determine datasource name")
        result['datasource_name'] = datasource_name

        database_type = r.get('database_type')
        if isinstance(database_type, str) is False:
            raise Exception("Cant determine database type")
        result['database_type'] = database_type

        try:
            result['connection_args'] = json.loads(r.get('connection_args'))
        except Exception:
            raise Exception('Cant parse connection arguments.')
Ejemplo n.º 6
0
    def _parser_piece_text():
        """
        Return PyParsing element to the text of a markdown link.
        """
        # No double line breaks in markdown links
        double_line_break = (Word("\n\r", exact=1) + Optional(Word(" \t")) +
                             Word("\n\r", exact=1))

        # We will ignore escaped square brackets when match finding balanced
        # square brackets.
        ignore = Literal("\\[") | Literal("\\]")

        # The text parser will match text inside balanced brackets using the
        # nestedExpr helper function from PyParsing.
        #
        # Next we define the content that is allowed inside the brackets.
        content_character = ~FollowedBy(double_line_break) + CharsNotIn(
            "[]", exact=1)
        # Normally with nestedExpr, the content parser would be separately applied
        # to each whitespace-separated string within the nested expression.
        # However, since we set whitespaceChars to '', the content parser is
        # applied to characters one-at-a-time.
        #
        # If this ever changes, we would need to change content to something
        # like Combine(OneOrMore(~ignore + content_character))
        content = content_character
        text = originalTextFor(
            nestedExpr(
                opener="[",
                closer="]",
                content=content,
                ignoreExpr=ignore,
            )).setResultsName("text")
        text.addParseAction(lambda s, l, toks: toks[0][1:-1])
        return text
Ejemplo n.º 7
0
def func_tokens(dictionary, parse_action):
    func_name = Word(alphas+'_', alphanums+'_')

    func_ident = Combine('$' + func_name.copy()('funcname'))
    func_tok = func_ident + originalTextFor(nestedExpr())('args')
    func_tok.leaveWhitespace()
    func_tok.setParseAction(parse_action)
    func_tok.enablePackrat()

    rx_tok = Combine(Literal('$').suppress() + Word(nums)('num'))

    def replace_token(tokens):
        index = int(tokens.num)
        return dictionary.get(index, u'')

    rx_tok.setParseAction(replace_token)

    strip = lambda s, l, tok: tok[0].strip()
    text_tok = CharsNotIn(u',').setParseAction(strip)
    quote_tok = QuotedString('"')

    if dictionary:
        arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok))
    else:
        arglist = Optional(delimitedList(quote_tok | text_tok))

    return func_tok, arglist, rx_tok
Ejemplo n.º 8
0
def asn1_loads(asn1_str):
    """
    Parse an ASN.1 file
    
    This is currently Pseudo-ASN; modify to become actual ASN.1
    """

    # ASN.1 grammar
    identifier = pp.Word(pp.alphas + "_")
    assign = pp.Literal("::=")
    # typedef = identifier.setName("typeref") + assign + identifier.setName("basetype")
    comment1 = pp.Literal("#") + pp.originalTextFor(pp.SkipTo(pp.LineEnd()))
    # typelist = pp.OneOrMore(typedef)
    meta1 = pp.LineStart() + identifier + pp.Literal(":") + pp.SkipTo(
        pp.LineEnd()).setDebug()
    meta2 = pp.LineStart() + pp.White() + pp.SkipTo(pp.LineEnd()).setDebug()
    metaval = meta1 + pp.ZeroOrMore(meta2)
    # metalist = pp.ZeroOrMore(comment1) + pp.Literal("/*") + pp.OneOrMore(metaval) + pp.Literal("*/")
    metalist = pp.SkipTo(pp.Literal("/*")).setDebug() + pp.Literal(
        "/*") + pp.OneOrMore(metaval).setDebug() + pp.Literal("*/")

    asn1 = metalist.parseString(asn1_str, parseAll=False)
    print(asn1)
    jaen = {"meta": {}, "types": []}
    return jaen
Ejemplo n.º 9
0
    def postParse(self, instring, loc, tokenList):
        if self.evalfn:
            res = Expr(self.name)
            res._evalfn = MethodType(self.evalfn, res)
        else:
            res = CompValue(self.name)
            if self.name == "ServiceGraphPattern":
                # Then this must be a service graph pattern and have
                # already matched.
                # lets assume there is one, for now, then test for two later.
                sgp = originalTextFor(self.expr)
                service_string = sgp.searchString(instring)[0][0]
                res["service_string"] = service_string

        for t in tokenList:
            if isinstance(t, ParamValue):
                if t.isList:
                    if t.name not in res:
                        res[t.name] = plist()
                    res[t.name].append(t.tokenList)
                else:
                    res[t.name] = t.tokenList
                # res.append(t.tokenList)
            # if isinstance(t,CompValue):
            #    res.update(t)
        return res
Ejemplo n.º 10
0
def func_tokens(dictionary, parse_action):
    func_name = Word(alphas + '_', alphanums + '_')

    func_ident = Combine('$' + func_name.copy()('funcname'))
    func_tok = func_ident + originalTextFor(nestedExpr())('args')
    func_tok.leaveWhitespace()
    func_tok.setParseAction(parse_action)
    func_tok.enablePackrat()

    rx_tok = Combine(Literal('$').suppress() + Word(nums)('num'))

    def replace_token(tokens):
        index = int(tokens.num)
        return dictionary.get(index, '')

    rx_tok.setParseAction(replace_token)

    strip = lambda s, l, tok: tok[0].strip()
    text_tok = CharsNotIn(',').setParseAction(strip)
    quote_tok = QuotedString('"')

    if dictionary:
        arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok))
    else:
        arglist = Optional(delimitedList(quote_tok | text_tok))

    return func_tok, arglist, rx_tok
Ejemplo n.º 11
0
def parse_template(template_text):
    identifier = Word(alphas, alphanums + '_')

    param = Group(identifier('name') + Suppress(':') + CharsNotIn(',)')('value'))
    param_list = Group(Suppress('(') + delimitedList(param, delim=',') + Suppress(')'))

    benchmark_id = originalTextFor(identifier + '.' + identifier + '.' + identifier)
    measurement_id = Group(benchmark_id('benchmark') + Optional(param_list('params')) + Suppress('[') + identifier('local_id') + Suppress(']'))

    macro = Group(Suppress('${') + measurement_id('measurement') + Suppress('}'))
    raw_text_block = originalTextFor(CharsNotIn('$'))

    text = ZeroOrMore(Group(raw_text_block('text') | macro('macro')))('template')

    text.leaveWhitespace()
    return text.parseString(template_text).asDict()
Ejemplo n.º 12
0
def parse_config_file(filepath):
    """
    This function defines that to parsed the netscalar input file
    :param filepath: path of netscalar input configuration
    :return: return parsed dict
    """

    EOL = LineEnd().suppress()
    comment = Suppress("#") + Suppress(restOfLine) + EOL
    SOL = LineStart().suppress()
    blank_line = SOL + EOL
    result = []
    hyphen = Literal("-")
    not_hyphen_sign = ''.join(c for c in printables if c != '-')
    text = Word(not_hyphen_sign, printables)
    key = Word('-', printables).setParseAction(
        lambda t: t[0].replace('-', '', 1))
    val = originalTextFor(Optional(ZeroOrMore(text), default=None))
    option = Group(key + val)
    multi_word_names = quotedString
    q_obj = originalTextFor(Keyword('q{')+SkipTo(Keyword("}")))
    command = Group(OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option))
    command.ignore(comment | blank_line)
    with open(filepath) as infile:
        line_no = 1
        print "Parsing Input Configuration..."
        lines = infile.readlines()
        total_lines = len(lines)
        for line in lines:
            try:
                tmp = command.parseString(line)
                tokens = tmp.asList()
                if tokens:
                    tokens[0].append(['line_no', str(line_no)])
                result += tokens
                line_no += 1
            except Exception as exception:
                line_no += 1
                LOG.error("Parsing error: " + line)
            msg = "Parsing started..."
            if line_no <= total_lines:
                ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress',
                                 suffix='')
        return result
Ejemplo n.º 13
0
    def parse_as_create_ai_table(self) -> dict:
        CREATE, AI, TABLE, VIEW, FROM, USING, AS = map(
            CaselessKeyword, "CREATE AI TABLE VIEW FROM USING AS".split())

        AI_TABLE = AI + TABLE

        word = Word(alphanums + "_")

        expr = (CREATE + (AI_TABLE | VIEW) + word('ai_table_name') + AS +
                originalTextFor(nestedExpr('(', ')'))('select'))

        r = expr.parseString(self._sql)
        r = r.asDict()

        if r['select'].startswith('(') and r['select'].endswith(')'):
            r['select'] = r['select'][1:-1]
        r['select'] = r['select'].strip(' \n')

        select = parse_sql(r['select'])

        if isinstance(select.from_table, Join) is False:
            raise Exception(
                "'from' must be like: 'from integration.table join predictor'")

        integration_name = select.from_table.left.parts[0]
        select.from_table.left.parts = select.from_table.left.parts[1:]
        integration_name_alias = select.from_table.left.alias.parts[0]

        predictor_name = select.from_table.right.parts[0]
        predictor_name_alias = select.from_table.right.alias.parts[0]
        select.from_table = select.from_table.left

        query_fields = []
        predictor_fields = []
        predictor_fields_targets = []

        integration_sql = str(select)

        for target in select.targets:
            if target.parts[0] == integration_name_alias:
                query_fields.append(target.parts[1])
                predictor_fields_targets.append(target)
            elif target.parts[0] == predictor_name_alias:
                predictor_fields.append(target.parts[1])
        select.targets = predictor_fields_targets

        res = {
            'ai_table_name': r['ai_table_name'],
            'integration_name': integration_name,
            'integration_query': integration_sql,
            'query_fields': query_fields,
            'predictor_name': predictor_name,
            'predictor_fields': predictor_fields
        }

        return res
Ejemplo n.º 14
0
    def cut_from_tail(self, text):
        ''' Removes 'text' from end of sql. Not case sensitive.
        '''
        text_arr = text.split(' ')

        ending = CaselessKeyword(text_arr[0])
        for x in text_arr[1:]:
            ending = ending + CaselessKeyword(x)
        ending = ending + StringEnd()

        expr = (originalTextFor(SkipTo(ending)))('original') + (originalTextFor(ending))('ending')

        try:
            r = expr.parseString(self._sql)
        except ParseException:
            return False

        self._sql = r.asDict()['original'].strip()
        return True
Ejemplo n.º 15
0
    def write(self, stream="/dev/stddev"):
      if isinstance(stream,(str,unicode)):
        with open(stream, 'w') as f:
          return self.write(f)
      

      sstream = StringIO.StringIO()
      super(BbQuiz,self).write(sstream)

      text = sstream.getvalue()

      # replace $...$ with \math{...}
      text = pp.QuotedString(quoteChar='$',convertWhitespaceEscapes=False).setParseAction(lambda toks: r'\math{%s}'%toks[0]).transformString( text )

      # Replace macros.
      command = pp.Word(pp.alphas)
      options = pp.originalTextFor( pp.nestedExpr( '[', ']' ) )
      arguments = pp.originalTextFor( pp.nestedExpr( '{', '}' ) )

      macro = pp.Combine( pp.Literal("\\") + command("command") + pp.ZeroOrMore(options)("options") + pp.ZeroOrMore(arguments)("arguments") )
      macro.setParseAction( self.expand_macro )

      # transform string until all macros have been expanded
      while True:
        newtext = macro.transformString( text )
        if newtext == text:
          break
        text = newtext



      # try to catch some syntax errors that will cause Bb to choke

      # 1. MC or MA questions don't have a "correct" answer
      for line in text.split('\n'):
        if line.startswith('MC') or line.startswith('MA'):
          if not re.search("\tcorrect", line):
            print "WARNING: A multiple choice/answer question does not have a correct answer. Blackboard will not parse this."
            print "\t",line[3:50],'...'
            print

      stream.write( text )
Ejemplo n.º 16
0
    def get_fragment_grammar():

        # Match header [mapping]
        header = Suppress("[") + Suppress("mapping") + Suppress("]")

        # There are three possible patterns for mapping entries:
        #       obj:symbol (scheme)
        #       obj (scheme)
        #       * (scheme)
        obj = Fragment.ENTITY.setResultsName("object")
        symbol = Suppress(":") + Fragment.IDENTIFIER.setResultsName("symbol")
        scheme = Suppress("(") + Fragment.IDENTIFIER.setResultsName(
            "scheme") + Suppress(")")

        pattern1 = Group(obj + symbol + scheme)
        pattern2 = Group(obj + scheme)
        pattern3 = Group(
            Literal(Mapping.MAPPING_ALL_OBJECTS).setResultsName("object") +
            scheme)

        mapping_entry = pattern1 | pattern2 | pattern3

        # To simplify parsing, classify groups of condition-mapping entry into two types: normal and default
        # A normal grouping is one with a non-default condition. The default grouping is one which contains the
        # default condition
        mapping_entries = Group(
            ZeroOrMore(mapping_entry)).setResultsName("mappings")

        normal_condition = Suppress(":") + originalTextFor(
            SDKConfig.get_expression_grammar())
        default_condition = Optional(
            Suppress(":") + Literal(Mapping.DEFAULT_CONDITION))

        normal_group = Group(
            normal_condition.setResultsName("condition") + mapping_entries)
        default_group = Group(default_condition +
                              mapping_entries).setResultsName("default_group")

        normal_groups = Group(
            ZeroOrMore(normal_group)).setResultsName("normal_groups")

        # Any mapping fragment definition can have zero or more normal group and only one default group as a last entry.
        archive = Suppress("archive") + Suppress(
            ":") + Fragment.ENTITY.setResultsName("archive")
        entries = Suppress("entries") + Suppress(":") + (
            normal_groups + default_group).setResultsName("entries")

        mapping = Group(header + archive + entries)

        mapping.setParseAction(lambda t: Mapping(t[0].archive, t[0].entries))

        mapping.ignore("#" + restOfLine)

        return mapping
Ejemplo n.º 17
0
 def parse_element(cls, indent_stack):
     """Set. to the rule the list of producers in ``producer`` attribute."""
     producer_body = (
         Word(alphanums + "_") + originalTextFor(nestedExpr()) +
         Suppress(',') +
         PYTHON_ALLOWED_EXPR).setParseAction(lambda toks: {
             'code': toks[0],
             'params': eval(toks[1]),
             'rule': eval(toks[2])
         })
     return (Keyword('producer:').suppress() + indentedBlock(
         OneOrMore(producer_body), indent_stack)).setResultsName('producer')
Ejemplo n.º 18
0
    def bootstrap(config):
        """
        Loads unit lists for use in this instance of the measurement parser

        :param config: cahoots config
        :type config: cahoots.config.BaseConfig
        """
        units = {}
        systems = {}
        prepositions = DataHandler().get_prepositions()

        directory = os.path.dirname(os.path.abspath(__file__))
        path = os.path.join(directory, "units/*.yaml")

        for file_path in glob.glob(path):
            unit_file = open(file_path, 'r')
            unit_type = yaml.load(unit_file)
            for unit in unit_type['keywords']:
                units[unit] = unit_type['id']
            systems[unit_type['id']] = \
                (unit_type['system'], unit_type['type'])

        preposition_parser = \
            Or([CaselessLiteral(s) for s in prepositions]) + Word(alphas)

        measurement_parser = \
            originalTextFor(
                Word(nums) +
                ZeroOrMore(',' + Word(nums+',')) +
                ZeroOrMore('.' + Word(nums)) +
                ZeroOrMore(Word(nums) + '/' + Word(nums))
            ) + \
            Or([CaselessLiteral(s) for s in units.keys()]) + \
            Optional(originalTextFor(preposition_parser))

        registry.set('MP_units', units)
        registry.set('MP_systems', systems)
        registry.set('MP_preposition_parser', preposition_parser)
        registry.set('MP_measurement_parser', measurement_parser)
Ejemplo n.º 19
0
    def bootstrap(config):
        """
        Loads unit lists for use in this instance of the measurement parser

        :param config: cahoots config
        :type config: cahoots.config.BaseConfig
        """
        units = {}
        systems = {}
        prepositions = DataHandler().get_prepositions()

        directory = os.path.dirname(os.path.abspath(__file__))
        path = os.path.join(directory, "units/*.yaml")

        for file_path in glob.glob(path):
            unit_file = open(file_path, 'r')
            unit_type = yaml.load(unit_file)
            for unit in unit_type['keywords']:
                units[unit] = unit_type['id']
            systems[unit_type['id']] = \
                (unit_type['system'], unit_type['type'])

        preposition_parser = \
            Or([CaselessLiteral(s) for s in prepositions]) + Word(alphas)

        measurement_parser = \
            originalTextFor(
                Word(nums, max=3) +
                ZeroOrMore(',' + Word(nums, exact=3)) +
                ZeroOrMore('.' + Word(nums)) +
                ZeroOrMore(Word(nums) + '/' + Word(nums))
            ) + \
            Or([CaselessLiteral(s) for s in units.keys()]) + \
            Optional(originalTextFor(preposition_parser))

        registry.set('MP_units', units)
        registry.set('MP_systems', systems)
        registry.set('MP_preposition_parser', preposition_parser)
        registry.set('MP_measurement_parser', measurement_parser)
Ejemplo n.º 20
0
 def parse_element(cls, indent_stack):
     """Set to the rule the list of aggregations."""
     aggregation_body = (
         Word(alphanums + "_") + originalTextFor(nestedExpr()) +
         Suppress(',') +
         PYTHON_ALLOWED_EXPR).setParseAction(lambda toks: {
             'name': toks[0],
             'engine': eval(toks[1]),
             'rule': eval(toks[2])
         })
     return (Keyword('aggregation:').suppress() +
             indentedBlock(OneOrMore(aggregation_body),
                           indent_stack)).setResultsName('aggregation')
Ejemplo n.º 21
0
 def parse_element(cls, indent_stack):
     """Set. to the rule the list of producers in ``producer`` attribute."""
     producer_body = (Word(alphanums + "_") +
                      originalTextFor(nestedExpr()) +
                      Suppress(',') +
                      PYTHON_ALLOWED_EXPR
                      ).setParseAction(
         lambda toks: {'code': toks[0],
                       'params': eval(toks[1]),
                       'rule': eval(toks[2])})
     return (Keyword('producer:').suppress() +
             indentedBlock(OneOrMore(producer_body), indent_stack)
             ).setResultsName('producer')
Ejemplo n.º 22
0
    def create_pre_timedelta_literal(tok):
        """Detects <number> <timescale> <preposition>"""
        delta = originalTextFor(Or([
            Word(nums) +
            ZeroOrMore(',' + Word(nums+',')) +
            ZeroOrMore('.' + Word(nums)),
            CaselessLiteral('an'),
            CaselessLiteral('a')
        ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals()

        delta.setName('pre' + tok).\
            setParseAction(DateParser.generate_pre_timedelta)

        return delta
Ejemplo n.º 23
0
    def create_pre_timedelta_literal(tok):
        """Detects <number> <timescale> <preposition>"""
        delta = originalTextFor(
            Or([
                Word(nums) + ZeroOrMore(',' + Word(nums + ',')) +
                ZeroOrMore('.' + Word(nums)),
                CaselessLiteral('an'),
                CaselessLiteral('a')
            ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals()

        delta.setName('pre' + tok).\
            setParseAction(DateParser.generate_pre_timedelta)

        return delta
Ejemplo n.º 24
0
    def _parser_piece_destination_and_title():
        """
        Return PyParsing element to match the destination and title of a
        markdown link.
        """

        # Capture everything between the balanced parentheses
        # Then parse it later.
        dest_and_title = originalTextFor(nestedExpr(
            opener="(",
            closer=")")).addParseAction(lambda s, l, toks: toks[0][1:-1])

        destination = Combine(
            # Zero or more non-space characters.
            # But before each character (exact=1) check if we have a
            # shortcode. If we do, allow that.
            ZeroOrMore(
                originalTextFor(nestedExpr(opener=R"{{<", closer=">}}"))
                | originalTextFor(nestedExpr(opener=R"{{%", closer="%}}"))
                | CharsNotIn(" \t", exact=1))).setResultsName("destination")

        # CommonMark requires link title to be encased in single-quotes,
        # double-quotes, or wrapped in parentheses. Let's not bother with
        # the parentheses case for now.
        title = (quotedString.copy().setResultsName("title").setParseAction(
            lambda s, l, toks: unescape_quoted_string(toks[0])))

        # This will parse the contents of dest_and_title
        dest_and_title_parser = destination + Optional(White(" ") +
                                                       title) + StringEnd()

        def back_parse_action(_s, _l, toks):
            return dest_and_title_parser.parseString(toks[0])

        dest_and_title.addParseAction(back_parse_action)

        return dest_and_title
Ejemplo n.º 25
0
def substfile_bnf():
    """ substfile_bnf()
    Defines the parser grammar for each template block within the given substitutions file

    :return: PyParser BNF Expression
    """
    expr = Keyword('file') + originalTextFor(Word(alphanums+'_$()/') + '.template')\
           + l_brace\
           + pattern_bnf()\
           + OneOrMore(instance_bnf())\
           + r_brace
    # Defines comments
    expr.ignore('#' + restOfLine)

    return expr
Ejemplo n.º 26
0
    def create_post_timedelta_literal(tok):
        """Detects <plus/minus> <number> <timescale>"""
        delta = Or([CaselessLiteral(t)
                    for t in ['+', '-', 'plus', 'minus']]) + originalTextFor(
                        Or([
                            Word(nums) + ZeroOrMore(',' + Word(nums + ',')) +
                            ZeroOrMore('.' + Word(nums)),
                            CaselessLiteral('an'),
                            CaselessLiteral('a')
                        ])) + CaselessLiteral(tok) + StringEnd()

        delta.setName('post' + tok).\
            setParseAction(DateParser.generate_post_timedelta)

        return delta
Ejemplo n.º 27
0
 def parse_element(cls, indent_stack):
     """Set to the rule the list of aggregations."""
     aggregation_body = (
         Word(alphanums + "_") +
         originalTextFor(nestedExpr()) +
         Suppress(',') +
         PYTHON_ALLOWED_EXPR
     ).setParseAction(lambda toks: {
         'name': toks[0],
         'engine': eval(toks[1]),
         'rule': eval(toks[2])
     })
     return (Keyword('aggregation:').suppress() +
             indentedBlock(OneOrMore(aggregation_body), indent_stack)
             ).setResultsName('aggregation')
Ejemplo n.º 28
0
    def create_post_timedelta_literal(tok):
        """Detects <plus/minus> <number> <timescale>"""
        delta = Or(
            [CaselessLiteral(t) for t in ['+', '-', 'plus', 'minus']]
        ) + originalTextFor(Or([
            Word(nums) +
            ZeroOrMore(',' + Word(nums+',')) +
            ZeroOrMore('.' + Word(nums)),
            CaselessLiteral('an'),
            CaselessLiteral('a')
        ])) + CaselessLiteral(tok) + StringEnd()

        delta.setName('post' + tok).\
            setParseAction(DateParser.generate_post_timedelta)

        return delta
Ejemplo n.º 29
0
    def getToken(self):
        assert self.start is not None
        assert self.end is not None
        assert self.start_html is not None
        assert self.end_html is not None
        assert self.name is not None

        token = originalTextFor(
            nestedExpr(opener=self.start,
                       closer=self.end,
                       content=None,
                       ignoreExpr=self.ignore))
        token = token.setParseAction(
            self.convertToHTML(self.start_html, self.end_html))(self.name)

        return token
Ejemplo n.º 30
0
  def load(self, fh=None, ass=None):
    if ass is None:
      ass = Assignment()

    fh = super().get_fh(fh)

    text = fh.read()

    # look for a pandoc-style configuration section. this
    # will be a yaml file imbedded in the text between two sets of '---'.
    res = pyparsing.originalTextFor(pyparsing.QuotedString(quoteChar='---',multiline=True)).searchString( text )
    config = None
    if len(res):
      text = text.replace(res[0][0],"")
      config = yaml.load(res[0][0].strip("-"), Loader=yaml.FullLoader)

    in_data = json.loads(self._markdown_to_json(text))

    # need to process JSON before passing to the JSON reader
    qkey = "Questions"
    for k in in_data.keys():
      if k.lower() == "questions":
        qkey = k

    out_data = dict()
    out_data['questions'] = list()
    i = 0
    N = len(in_data.get(qkey,list()))
    while i < N:
      q = dict()
      q['text'] = in_data[qkey][i]
      if i+1 < N and isinstance( in_data[qkey][i+1], list ):
        i += 1
        q['answer'] = dict()
        q['answer']['choices'] = in_data[qkey][i]
      elif self.throw_on_missing_answers:
        raise RuntimeError("A question without an answer was found. Question test '{}'".format(q['text']) )

      out_data['questions'].append(q)
      i += 1


    ass = JSON().load(io.StringIO(json.dumps(out_data)))
    if config is not None:
      ass.meta.__dict__.update( config )

    return ass
Ejemplo n.º 31
0
def _get_value(wiki_markup, parse_actions=False):
    """Get value parser element.

    :param ParserElement wiki_markup: wiki markup

    value = wiki_markup;

    :returns: value parser element
    :rtype: ParserElement
    """
    value = pyparsing.Combine(
        pyparsing.OneOrMore(
            pyparsing.originalTextFor(wiki_markup))).setResultsName("value")
    value.setName("value")
    value.parseWithTabs()
    if parse_actions:
        pass
    return value
Ejemplo n.º 32
0
def grammar(append_line_break=True):
    REF = Combine(Literal('"Ref"'))
    FN = Combine(Literal('"Fn::') + Word(alphanums) + Literal('"'))

    json_val = Forward()
    json_string = dblQuotedString
    json_list_items = delimitedList(json_val)
    json_list = Literal('[') + Optional(json_list_items) + Literal(']')
    json_dict_member = json_string + Literal(':') + json_val
    json_dict_members = delimitedList(json_dict_member)
    json_dict = Literal('{') + Optional(json_dict_members) + Literal('}')

    json_val << (json_string | json_list | json_dict)

    aws_member = (REF | FN) + Literal(':') + originalTextFor(json_val)
    aws = Group(Literal('{') +
                    aws_member +
                    Literal('}')).setParseAction(translate)
    aws_start = Literal('{').leaveWhitespace() + (REF | FN)
    term = aws_start | LineEnd()

    script_stuff = Group(
                            ZeroOrMore(White()) +
                            SkipTo(term)
                        ).setParseAction(translate)
    script_end = 'ScriptEnd' if append_line_break else 'ScriptEndNLB'
    script_line = script_stuff(script_end) + ~aws_start + Suppress(LineEnd())
    script_line_ending_with_aws = (
                                    Optional(script_stuff('Script')) +
                                    aws('AWS') +
                                    Suppress(LineEnd())
                                ).setParseAction(translate)
    aws_script_start = Optional(script_stuff('Script')) + aws('AWS')
    script_line_containing_aws = aws_script_start + script_line
    script_line_containing_many_aws = (
                                    OneOrMore(aws_script_start) +
                                    script_line)
    line = Group(
                    script_line_ending_with_aws('AWSEnd') |
                    script_line_containing_many_aws |
                    script_line
                ).setParseAction(process_line)

    return OneOrMore(line('Line'))
Ejemplo n.º 33
0
    def get_fragment_grammar():

        # Match header [mapping]
        header = Suppress("[") + Suppress("mapping") + Suppress("]")

        # There are three possible patterns for mapping entries:
        #       obj:symbol (scheme)
        #       obj (scheme)
        #       * (scheme)
        obj = Fragment.ENTITY.setResultsName("object")
        symbol = Suppress(":") + Fragment.IDENTIFIER.setResultsName("symbol")
        scheme = Suppress("(") + Fragment.IDENTIFIER.setResultsName("scheme") + Suppress(")")

        pattern1 = Group(obj + symbol + scheme)
        pattern2 = Group(obj + scheme)
        pattern3 = Group(Literal(Mapping.MAPPING_ALL_OBJECTS).setResultsName("object") + scheme)

        mapping_entry = pattern1 | pattern2 | pattern3

        # To simplify parsing, classify groups of condition-mapping entry into two types: normal and default
        # A normal grouping is one with a non-default condition. The default grouping is one which contains the
        # default condition
        mapping_entries = Group(ZeroOrMore(mapping_entry)).setResultsName("mappings")

        normal_condition = Suppress(":") + originalTextFor(SDKConfig.get_expression_grammar())
        default_condition = Optional(Suppress(":") + Literal(Mapping.DEFAULT_CONDITION))

        normal_group = Group(normal_condition.setResultsName("condition") + mapping_entries)
        default_group = Group(default_condition + mapping_entries).setResultsName("default_group")

        normal_groups = Group(ZeroOrMore(normal_group)).setResultsName("normal_groups")

        # Any mapping fragment definition can have zero or more normal group and only one default group as a last entry.
        archive = Suppress("archive") + Suppress(":") + Fragment.ENTITY.setResultsName("archive")
        entries = Suppress("entries") + Suppress(":") + (normal_groups + default_group).setResultsName("entries")

        mapping = Group(header + archive + entries)

        mapping.setParseAction(lambda t: Mapping(t[0].archive, t[0].entries))

        mapping.ignore("#" + restOfLine)

        return mapping
Ejemplo n.º 34
0
def parse_variadic_templates(txt):
    template_param_type = Word(alphas)
    template_variadic = Literal('...')
    template_id = Word(alphas)

    template_variadic_param = Group(
            template_param_type + template_variadic + template_id
            )

    template_param = Group( template_param_type + template_id )

    # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) )
    template_params = (
            Optional( OneOrMore(template_param + ',') )
            + template_variadic_param
            + Optional( OneOrMore( ',' + template_param ) )
            )

    template_params_no_variadic = (
            template_param + Optional( OneOrMore( ',' + template_param ) )
            )

    template_decl = Optional(
            "template"
            + Literal("<") + template_params_no_variadic +  Literal(">")
            ) + "template" + Literal("<") + template_params +  Literal(">")

    block_content = Forward()
    block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0,1)
    block_content <<  ( CharsNotIn('{}') | block )

    decl = originalTextFor( template_decl + CharsNotIn('{') + block )


    template_file = Forward()
    code_block = decl | White() | Word(printables)
    template_file << ( Optional(OneOrMore(code_block)) | template_file)


    parsed = template_file.parseString( txt )

    return parsed
Ejemplo n.º 35
0
    def create_pre_timedelta_literal(tok):
        """
        Detects <number> <timescale> <preposition>

        :param tok: the token we want to produce a detector for
        :type tok: str
        :return: the caseless literal
        :rtype: pyparsing.And
        """
        delta = originalTextFor(Or([
            Word(nums) +
            ZeroOrMore(',' + Word(nums+',')) +
            ZeroOrMore('.' + Word(nums)),
            CaselessLiteral('an'),
            CaselessLiteral('a')
        ])) + CaselessLiteral(tok) + DateParser.get_preposition_literals()

        delta.setName('pre' + tok).\
            setParseAction(DateParser.generate_pre_timedelta)

        return delta
Ejemplo n.º 36
0
def parse_plus_string_to_list(plus_delimited_string):
    """
    utility function to convert '+' delimited, quoted string into list of
    substrings.

    >>> s1 = "a"
    >>> parse_plus_string_to_list( s1)
    ['a']
    >>> s1 = "a+b"
    >>> parse_plus_string_to_list( s1)
    ['a', 'b']
    """
    delimiter = '+'
    non_delimiter_chars = pyparsing.printables.replace(delimiter, '')
    OneOrMore = pyparsing.OneOrMore
    expression = pyparsing.originalTextFor(
        OneOrMore(pyparsing.quotedString
                  | pyparsing.Word(non_delimiter_chars)))
    expressions = pyparsing.delimitedList(expression, delimiter)
    parse_result = expressions.parseString(plus_delimited_string)
    # extract the identified tokens from the pyparsing object, as a list
    results = list(parse_result)
    return results
Ejemplo n.º 37
0
    def create_post_timedelta_literal(tok):
        """
        Detects <plus/minus> <number> <timescale>

        :param tok: the token we want to produce a detector for
        :type tok: str
        :return: the caseless literal
        :rtype: pyparsing.Or
        """
        delta = Or(
            [CaselessLiteral(t) for t in ['+', '-', 'plus', 'minus']]
        ) + originalTextFor(Or([
            Word(nums) +
            ZeroOrMore(',' + Word(nums+',')) +
            ZeroOrMore('.' + Word(nums)),
            CaselessLiteral('an'),
            CaselessLiteral('a')
        ])) + CaselessLiteral(tok) + StringEnd()

        delta.setName('post' + tok).\
            setParseAction(DateParser.generate_post_timedelta)

        return delta
Ejemplo n.º 38
0
def formatList(string: Optional[str],
               replaceSemicolons=True,
               replaceAnds=True) -> List[str]:
    """Parses a list.

    Garantees that each element of the list is non-null and non-empty. Gracefully supports quoting: does
    not split items that are quoted (single or double quotes).

    Args:
    - string (Optional[str]): String to convert into a list, using colons as separators
    - replaceSemicolons (bool, optional): Also use semicolons as separators. Defaults to True.
    - replaceAnds (bool, optional): Also uses "and" as separators. Defaults to True.

    Returns:
    - List[str]: List of strings
    """
    if string is None:
        return []

    value = string.strip()
    if replaceSemicolons:
        value = value.replace(";", ", ")
    if replaceAnds:
        value = value.replace(" et ", ", ").replace(" and ", ", ")

    quotedstring = pyparsing.quotedString.copy()
    quotedstring.addParseAction(pyparsing.removeQuotes)
    element = pyparsing.originalTextFor(
        pyparsing.ZeroOrMore(
            pyparsing.Word(pyparsing.printables + pyparsing.alphas8bit,
                           excludeChars="(),")
            | pyparsing.nestedExpr()))

    expr = pyparsing.delimitedList(quotedstring | element)
    parsed = expr.parseString(value, parseAll=True)

    return [x for x in parsed.asList() if x]
Ejemplo n.º 39
0
def parse_variadic_templates(txt):
    template_param_type = Word(alphas)
    template_variadic = Literal('...')
    template_id = Word(alphas)

    template_variadic_param = Group(template_param_type + template_variadic +
                                    template_id)

    template_param = Group(template_param_type + template_id)

    # template_params = Group ( delimitedList( template_variadic_param | Optional(template_param) ) )
    template_params = (Optional(OneOrMore(template_param + ',')) +
                       template_variadic_param +
                       Optional(OneOrMore(',' + template_param)))

    template_params_no_variadic = (template_param +
                                   Optional(OneOrMore(',' + template_param)))

    template_decl = Optional("template" + Literal("<") +
                             template_params_no_variadic +
                             Literal(">")) + "template" + Literal(
                                 "<") + template_params + Literal(">")

    block_content = Forward()
    block = nestedExpr('{', '}', content=block_content) + Literal(';') * (0, 1)
    block_content << (CharsNotIn('{}') | block)

    decl = originalTextFor(template_decl + CharsNotIn('{') + block)

    template_file = Forward()
    code_block = decl | White() | Word(printables)
    template_file << (Optional(OneOrMore(code_block)) | template_file)

    parsed = template_file.parseString(txt)

    return parsed
Ejemplo n.º 40
0
def parser_factory(styler):
    """Builds the repr() parser."""
    squo = styler('class:string', "'")
    dquo = styler('class:string', '"')

    esc_single = pp.oneOf(r'\\ \' \" \n \r \t')
    esc_hex = pp.Literal(r'\x') + pp.Word(pp.hexnums, exact=2)
    escs = styler('class:escape', esc_single | esc_hex)

    control_chars = ''.join(map(chr, range(32))) + '\x7f'
    normal_chars_squo = pp.CharsNotIn(control_chars + r"\'")
    chars_squo = styler('class:string', normal_chars_squo) | escs
    normal_chars_dquo = pp.CharsNotIn(control_chars + r'\"')
    chars_dquo = styler('class:string', normal_chars_dquo) | escs

    skip_white = pp.Optional(pp.White())
    bytes_prefix = pp.Optional(styler('class:string_prefix', 'b'))
    string_squo = skip_white + bytes_prefix + squo - pp.ZeroOrMore(
        chars_squo) + squo
    string_dquo = skip_white + bytes_prefix + dquo - pp.ZeroOrMore(
        chars_dquo) + dquo
    string = string_squo | string_dquo
    string.leaveWhitespace()

    address = styler('class:address', '0x' + pp.Word(pp.hexnums))
    number = styler('class:number', ppc.number)
    const = pp.oneOf('True False None NotImplemented Ellipsis ...')
    const = styler('class:constant', const)
    kwarg = styler('class:kwarg', ppc.identifier) + styler(
        'class:operator', '=')
    call = styler('class:call', ppc.identifier) + pp.FollowedBy('(')
    magic = styler('class:magic', pp.Regex(r'__[a-zA-Z0-9_]+__'))

    token = string | address | number | const | kwarg | call | magic
    token.parseWithTabs()
    return pp.originalTextFor(token)
Ejemplo n.º 41
0
 def parse_element(cls, indent_stack):
     """Set ``only_if_master_value`` attribute to the rule."""
     return (Keyword("@only_if_master_value").suppress() +
             originalTextFor(nestedExpr())
             ).setResultsName("only_if_master_value").setParseAction(
                 lambda toks: toks[0])
Ejemplo n.º 42
0
    def __init__(self, codeBlock, codeIndex, msg):
        ParserException.__init__(self, codeBlock.xmlElement, msg)
        
        self.columnNumber = col(codeIndex, codeBlock.codeString)
        self.lineNumber = codeBlock.scriptLineNumber + lineno(codeIndex, codeBlock.codeString)-1
    


identifier = Word(alphas + '_', alphanums + '_')
numericConstant = Regex(r'\b((0(x|X)[0-9a-fA-F]*)|(([0-9]+\.?[0-9]*)|(\.[0-9]+))((e|E)(\+|-)?[0-9]+)?)(L|l|UL|ul|u|U|F|f|ll|LL|ull|ULL)?\b')

ignoreExpr = cppStyleComment.copy() | quotedString.copy()

baseExpr = Forward()

arrayAccess = originalTextFor(nestedExpr('[', ']', baseExpr, ignoreExpr))
parenthisedExpression = originalTextFor(nestedExpr('(', ')', baseExpr, ignoreExpr))
functionCall = nestedExpr('(', ')', delimitedList(baseExpr), ignoreExpr)
alphaNumPlusSafePunctuation = alphanums + '!#$%&\\*+-./:;<=>@^_`{|}~'

baseExpr << OneOrMore(originalTextFor(identifier + functionCall) | quotedString.copy() \
                | identifier | numericConstant | arrayAccess | parenthisedExpression \
                | Word(alphaNumPlusSafePunctuation))
baseExpr.ignore(cppStyleComment.copy())


def targetComponentsForOperatorsInString(operatorNames, codeBlock):
    """
    Return a list of pairs of operator names and their targets that are in `codeString`.
    The valid operator names searched for are `operatorNames`. For example, if 'L' is in `operatorNames`,
    then in the code ``L[phi]`` the return value would be ``('L', 'phi', slice(firstCharacterIndex, lastCharacterIndex))``.
Ejemplo n.º 43
0
def _create_config_parser():
    """
    Creates a parser using pyparsing that works with bibfield rule definitions

    BNF like grammar:

    rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include
    include ::= "include(" PATH ")"
    body ::=  [inherit_from] (creator | derived | calculated) [checker] [documentation]
    aliases ::= json_id ["[0]" | "[n]"] ["," aliases]
    creator ::= "creator:" INDENT creator_body+ UNDENT
    creator_body ::= [parse_first] [legacy] source_format "," source_tag "," python_allowed_expr
    source_format ::= MASTER_FORMATS
    source_tag ::= QUOTED_STRING

    derived ::= "derived" INDENT derived_calculated_body UNDENT
    calculated ::= "calculated:" INDENT derived_calculated_body UNDENT
    derived_calculated_body ::= [parse_first] [depends_on] [only_if] [do_not_cache] "," python_allowed_exp


    peristent_identfier ::= @persitent_identifier( level )
    inherit_from ::= "@inherit_from()"
    legacy ::= "@legacy(" correspondences+ ")"
    do_not_cache ::= "@do_not_cache"
    correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")"
    parse_first ::= "@parse_first(" jsonid+ ")"
    depends_on ::= "@depends_on(" json_id+ ")"
    only_if ::= "@only_if(" python_condition+ ")"

    python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call

    checker ::= "checker:" INDENT checker_function+ UNDENT

    documentation ::= INDENT doc_string subfield* UNDENT
    doc_string ::= QUOTED_STRING
    subfield ::= "@subfield" json_id["."json_id*] ":" docstring
    """

    indent_stack = [1]

    def check_sub_indent(str, location, tokens):
        cur_col = col(location, str)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(str, location, "not a subentry")

    def check_unindent(str, location, tokens):
        if location >= len(str):
            return
        cur_col = col(location, str)
        if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(str, location, "not an unindent")

    def do_unindent():
        indent_stack.pop()

    INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent)
    UNDENT = FollowedBy(empty).setParseAction(check_unindent)
    UNDENT.setParseAction(do_unindent)

    json_id = (Word(alphanums + "_") + Optional(oneOf("[0] [n]")))\
              .setResultsName("json_id", listAllMatches=True)\
              .setParseAction(lambda tokens: "".join(tokens))
    aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]")))
                            .setParseAction(lambda tokens: "".join(tokens)))\
              .setResultsName("aliases")
    python_allowed_expr = Forward()
    ident = Word(alphas + "_", alphanums + "_")
    dict_def = originalTextFor(nestedExpr('{', '}'))
    list_def = originalTextFor(nestedExpr('[', ']'))
    dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']'))
    function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')'))

    python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call)\
                          .setResultsName("value", listAllMatches=True)

    persistent_identifier = (Suppress("@persistent_identifier") +  nestedExpr("(", ")"))\
                            .setResultsName("persistent_identifier")
    inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\
                    .setResultsName("inherit_from")
    legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\
             .setResultsName("legacy", listAllMatches=True)
    only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\
              .setResultsName("only_if")
    depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\
                 .setResultsName("depends_on")
    parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\
                  .setResultsName("parse_first")
    do_not_cache = (Suppress("@") + "do_not_cache")\
                   .setResultsName("do_not_cache")
    master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\
                    .setResultsName("master_format")

    derived_calculated_body = Optional(parse_first) + Optional(depends_on) + Optional(only_if) + Optional(do_not_cache) + python_allowed_expr

    derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT
    calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT

    source_tag = quotedString\
                .setParseAction(removeQuotes)\
                .setResultsName("source_tag", listAllMatches=True)
    source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\
                    .setResultsName("source_format", listAllMatches=True)
    creator_body = (Optional(parse_first) + Optional(depends_on) + Optional(only_if) +  Optional(legacy) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\
                                            .setResultsName("creator_def", listAllMatches=True)
    creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT

    checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\
                       .setResultsName("checker_function", listAllMatches=True)
    checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT)

    doc_string = QuotedString(quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes)
    subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\
                 .setResultsName("subfields", listAllMatches=True)
    documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\
                     .setResultsName("documentation")

    field_def = (creator | derived | calculated)\
                .setResultsName("type_field", listAllMatches=True)

    body = Optional(inherit_from) + Optional(field_def) + Optional(checker) + Optional(documentation)
    comment = Literal("#") + restOfLine + LineEnd()
    include = (Suppress("include") + quotedString)\
              .setResultsName("includes", listAllMatches=True)
    rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\
           .setResultsName("rules", listAllMatches=True)

    return OneOrMore(rule | include | comment.suppress())
cated_expr = arith_expr + ZeroOrMore('//' + arith_expr)
comp_op << oneOf('< > == >= <= /= .lt. .gt. .eq. .ge. .le. .ne.')
comparison = arith_expr + ZeroOrMore(comp_op + arith_expr)
not_test = Forward()
not_test << (('.not.' + not_test) | comparison)
and_test = not_test + ZeroOrMore('.and.' + not_test)
or_test = and_test + ZeroOrMore('.or.' + and_test)
logical_eq_test = or_test + ZeroOrMore(oneOf('.eqv. .neqv.') + or_test)
user_dyadic_test = logical_eq_test + ZeroOrMore(user_op + logical_eq_test)
test = user_dyadic_test

calllist << ('(' + Optional(delimitedList(test, delim=',')) + ')')
array_literal = '(/' + delimitedList(test, delim=',') + '/)'
atom << (('(' + test + ')') | array_literal | NAME | INTEGER_K | REAL | STRING | oneOf('.true. .false.'))

orig_test = originalTextFor(test)
orig_test.addParseAction(lambda s,loc,toks: [toks[0].strip()])


funcall = Group(NAME + '(' + delimitedList(test, delim=',') + ')')

comma = Literal(',').setParseAction(lambda s,loc,toks: [', '])

do_kwd = p.Keyword('do').setParseAction(lambda s,loc,toks: ['do '])
ivar = NAME.setResultsName('ivar')
istart = orig_test.setResultsName('istart')
eqsign = p.Literal('=').setParseAction(lambda s,loc,toks: [' = '])
comma = p.Literal(',').setParseAction(lambda s,loc,toks: [', '])
iend = orig_test.setResultsName('iend')
istep = orig_test.setResultsName('istep')
Ejemplo n.º 45
0
def _create_field_parser():
    """
    Creates a parser using pyparsing that works with bibfield rule definitions

    BNF like grammar:

    rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment
    include ::= "include(" PATH ")"
    body ::=  [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer]
    aliases ::= json_id ["[0]" | "[n]"] ["," aliases]

    creator ::= "creator:" INDENT creator_body+ UNDENT
    creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr
    source_format ::= MASTER_FORMATS
    source_tag ::= QUOTED_STRING

    derived ::= "derived" INDENT derived_calculated_body UNDENT
    calculated ::= "calculated:" INDENT derived_calculated_body UNDENT
    derived_calculated_body ::= [decorators] "," python_allowed_exp

    decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)*
    peristent_identfier ::= @persitent_identifier( level )
    legacy ::= "@legacy(" correspondences+ ")"
    correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")"
    parse_first ::= "@parse_first(" jsonid+ ")"
    depends_on ::= "@depends_on(" json_id+ ")"
    only_if ::= "@only_if(" python_condition+ ")"
    only_if_master_value ::= "@only_if_master_value(" python_condition+  ")"

    inherit_from ::= "@inherit_from()"

    python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call

    checker ::= "checker:" INDENT checker_function+ UNDENT

    documentation ::= INDENT doc_string subfield* UNDENT
    doc_string ::= QUOTED_STRING
    subfield ::= "@subfield" json_id["."json_id*] ":" docstring

    producer ::= "producer:" INDENT producer_body UNDENT
    producer_body ::= producer_code "," python_dictionary
    producer_code ::= ident
    """

    indent_stack = [1]

    def check_sub_indent(str, location, tokens):
        cur_col = col(location, str)
        if cur_col > indent_stack[-1]:
            indent_stack.append(cur_col)
        else:
            raise ParseException(str, location, "not a subentry")

    def check_unindent(str, location, tokens):
        if location >= len(str):
            return
        cur_col = col(location, str)
        if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]):
            raise ParseException(str, location, "not an unindent")

    def do_unindent():
        indent_stack.pop()

    INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent)
    UNDENT = FollowedBy(empty).setParseAction(check_unindent)
    UNDENT.setParseAction(do_unindent)

    json_id = (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))\
              .setResultsName("json_id", listAllMatches=True)\
              .setParseAction(lambda tokens: "".join(tokens))
    aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]")))
                            .setParseAction(lambda tokens: "".join(tokens)))\
              .setResultsName("aliases")
    ident = Word(alphas + "_", alphanums + "_")
    dict_def = originalTextFor(nestedExpr('{', '}'))
    list_def = originalTextFor(nestedExpr('[', ']'))
    dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']'))
    function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')'))

    python_allowed_expr = (dict_def ^ list_def ^ dict_access ^ \
            list_access ^ function_call ^ restOfLine)\
            .setResultsName("value", listAllMatches=True)

    persistent_identifier = (Suppress("@persistent_identifier") + \
            nestedExpr("(", ")"))\
            .setResultsName("persistent_identifier")
    legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("legacy", listAllMatches=True)
    only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("only_if")
    only_if_master_value = (Suppress("@only_if_value") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("only_if_master_value")
    depends_on = (Suppress("@depends_on") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("depends_on")
    parse_first = (Suppress("@parse_first") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("parse_first")
    memoize = (Suppress("@memoize") + nestedExpr("(", ")"))\
            .setResultsName("memoize")
    field_decorator = parse_first ^ depends_on ^ only_if ^ \
            only_if_master_value ^ memoize ^ legacy

    #Independent decorators
    inherit_from = (Suppress("@inherit_from") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("inherit_from")
    override = (Suppress("@") + "override")\
            .setResultsName("override")
    extend = (Suppress("@") + "extend")\
            .setResultsName("extend")
    master_format = (Suppress("@master_format") + \
            originalTextFor(nestedExpr("(", ")")))\
            .setResultsName("master_format") \
            .setParseAction(lambda toks: toks[0])

    derived_calculated_body = (ZeroOrMore(field_decorator) + python_allowed_expr)\
            .setResultsName('derived_calculated_def')

    derived = "derived" + Suppress(":") + \
            INDENT + derived_calculated_body + UNDENT
    calculated = "calculated" + Suppress(":") + \
            INDENT + derived_calculated_body + UNDENT

    source_tag = quotedString\
            .setParseAction(removeQuotes)\
            .setResultsName("source_tag", listAllMatches=True)
    source_format = Word(alphas, alphanums + "_")\
                    .setResultsName("source_format", listAllMatches=True)
    creator_body = (ZeroOrMore(field_decorator) + source_format + \
            Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\
            .setResultsName("creator_def", listAllMatches=True)
    creator = "creator" + Suppress(":") + \
            INDENT + OneOrMore(creator_body) + UNDENT
    field_def = (creator | derived | calculated)\
                .setResultsName("type_field", listAllMatches=True)

    #JsonExtra
    json_dumps = (Suppress('dumps') + Suppress(',') + python_allowed_expr)\
        .setResultsName("dumps")\
        .setParseAction(lambda toks: toks.value[0])
    json_loads = (Suppress("loads") + Suppress(",") + python_allowed_expr)\
        .setResultsName("loads")\
        .setParseAction(lambda toks: toks.value[0])

    json_extra = (Suppress('json:') + \
            INDENT + Each((json_dumps, json_loads)) + UNDENT)\
            .setResultsName('json_ext')

    #Checker
    checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\
                       .setResultsName("checker", listAllMatches=True)
    checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT)

    #Description/Documentation
    doc_double = QuotedString(quoteChar='"""', multiline=True)
    doc_single = QuotedString(quoteChar="'''", multiline=True)
    doc_string = INDENT + (doc_double | doc_single) + UNDENT
    description_body = (Suppress('description:') + doc_string).\
                setParseAction(lambda toks: toks[0][0])
    description = (description_body | doc_double | doc_single)\
            .setResultsName('description')

    #Producer
    producer_code = (Word(alphas, alphanums + "_")\
           + originalTextFor(nestedExpr("(", ")")))\
           .setResultsName('producer_code', listAllMatches=True)
    producer_body = (producer_code + Suppress(",") + python_allowed_expr)\
                    .setResultsName("producer_rule", listAllMatches=True)
    producer = Suppress("producer:") + INDENT + OneOrMore(producer_body) + UNDENT

    schema = (Suppress('schema:') + INDENT + dict_def + UNDENT)\
            .setParseAction(lambda toks: toks[0])\
            .setResultsName('schema')

    body = Optional(field_def) & Optional(checker) & Optional(json_extra) \
            & Optional(description) & Optional(producer) & Optional(schema)
    comment = Literal("#") + restOfLine + LineEnd()
    include = (Suppress("include") + quotedString)\
              .setResultsName("includes", listAllMatches=True)
    rule = (Optional(persistent_identifier) + Optional(inherit_from) + \
            Optional(override) + Optional(extend) +json_id + \
            Optional(Suppress(",") + aliases) + Suppress(":") + \
            INDENT + body + UNDENT)\
           .setResultsName("rules", listAllMatches=True)

    return OneOrMore(rule | include | comment.suppress())
Ejemplo n.º 46
0
date = Combine(integer + '/' + integer + '/' + integer)

# Define the line definitions
gender_line = gender("sex") + NL
dob_line = date("DOB") + NL
name_line = restOfLine("name") + NL
id_line = Word(alphanums + '-')("ID") + NL
recnum_line = integer("recnum") + NL

# Define forms of address lines
first_addr_line = Suppress('.') + empty + restOfLine + NL
# Subsequent address line is not gender
subsq_addr_line = ~(gender_line) + restOfLine + NL

# a line with a name and a recnum combined, if no ID
name_recnum_line = originalTextFor(OneOrMore(Word(alphas + ',')))("name") + \
    integer("recnum") + NL

# Defining the form of an overall record, either with or without an ID
record = Group((first_addr_line + ZeroOrMore(subsq_addr_line))("address") + \
    gender_line + dob_line + ((name_line + id_line + recnum_line) | \
    name_recnum_line))

# Parse Data
records = OneOrMore(record).parseString(data)

# output the desired results (note that address is actually a list of lines)

for rec in records:
    if rec.ID:
        fout.write("%(name)s, %(ID)s, %(address)s, %(sex)s, %(DOB)s\n" % rec)
Ejemplo n.º 47
0
           'it', 'itself', 'its', 'one', 'oneself',
           'they', 'them', 'themself', 'themselves', 'theirs', 'their']
ARTICLES = ['the', 'a', 'an']
NUMBERS = ["zero", "oh", "zip", "zilch", "nada", "bupkis", "one", "two",
           "three", "four", "five", "six", "seven", "eight", "nine", "ten",
           "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
           "seventeen", "eighteen", "nineteen", "ten", "twenty", "thirty",
           "forty", "fifty", "sixty", "seventy", "eighty", "ninety", "thousand",
           "million", "billion", "trillion", "quadrillion", "quintillion"]
COPULA = ["be", "am", "is", "are", "being", "was", "were", "been"]

CLOSED_WORD = set(PREPOSITIONS) | set(CONJUNTIONS) | set(PRONONS) | \
              set(ARTICLES) | set(NUMBERS) | set(COPULA)

bibtexChars = alphas + nums + "\\\\.-':,"
bracedWord = originalTextFor(nestedExpr("{", "}"))
bracedWord.addParseAction(removeQuotes)
WORDS = Word(bibtexChars) | bracedWord('braced')

def caps(text):
    """
    Capitalizes the first letter of the text and keeps the rest of the text
    intact.
    """
    if text:
        return text[0].upper() + text[1:]
    return ''

def abbrev(text, sep=' '):
    """
    Abbreviates a text.
Ejemplo n.º 48
0
import six

from pyparsing import ParseException, FollowedBy, Suppress, OneOrMore, Word, \
    LineEnd, ZeroOrMore, Optional, Literal, alphas, alphanums, \
    originalTextFor, nestedExpr, quotedString, removeQuotes, lineEnd, \
    empty, col, restOfLine, delimitedList, Each, Keyword, commaSeparatedList, \
    Group

from .errors import FieldParserException, ModelParserException

ParseException.defaultWhitespaceChars = (' \r\t')

COMMENT = (Literal("#") + restOfLine + LineEnd()).suppress()

IDENT = Word(alphanums + '_')
DICT_DEF = originalTextFor(nestedExpr('{', '}'))
LIST_DEF = originalTextFor(nestedExpr('[', ']'))
DICT_ACCESS = LIST_ACCESS = originalTextFor(IDENT + nestedExpr('[', ']'))

PYTHON_ALLOWED_EXPR = (DICT_DEF ^ LIST_DEF ^ DICT_ACCESS ^ LIST_ACCESS
                       ^ restOfLine).setParseAction(lambda toks: toks[0])


def indentedBlock(expr, indent_stack, indent=True):
    """Define space-delimited indentation blocks.

    Helper method for defining space-delimited indentation blocks, such as
    those used to define block statements in Python source code.

    There is also a version in pyparsing but doesn't seem to be working fine
    with JSONAlchemy cfg files.
Ejemplo n.º 49
0
from pyparsing_utils import LemmatizedWord
import wordlists


in_parens = Regex(r"\([^)]+\)")


modifier = Or(LemmatizedWord(w) for w in wordlists.food_adjectives if w) | in_parens | Keyword("to taste")


base_ingredient = Regex(r"[^-(),][^ (),]+") + SkipTo(
    Keyword("to taste") | Literal(",") | Word("-") | in_parens | LineEnd()
)


unit = Optional(in_parens) + Or(LemmatizedWord(w) for w in wordlists.units_of_measure if w)


quantity = OneOrMore(Word(nums + "-/"))


ingredient_line = (
    originalTextFor(Optional(quantity)).setResultsName("quantity")
    + originalTextFor(Optional(unit)).setResultsName("unit")
    + originalTextFor(ZeroOrMore(modifier + Optional(","))).setResultsName("pre_modifiers")
    + originalTextFor(base_ingredient).setResultsName("base_ingredient")
    + Optional(",")
    + Optional("-")
    + originalTextFor(SkipTo(LineEnd(), True)).setResultsName("post_modifiers")
)
Ejemplo n.º 50
0
    def __init__(self, fragment_file, sdkconfig):
        try:
            fragment_file = open(fragment_file, "r")
        except TypeError:
            pass

        path = os.path.realpath(fragment_file.name)

        indent_stack = [1]

        class parse_ctx:
            fragment = None  # current fragment
            key = ""  # current key
            keys = list()  # list of keys parsed
            key_grammar = None  # current key grammar

            @staticmethod
            def reset():
                parse_ctx.fragment_instance = None
                parse_ctx.key = ""
                parse_ctx.keys = list()
                parse_ctx.key_grammar = None

        def fragment_type_parse_action(toks):
            parse_ctx.reset()
            parse_ctx.fragment = FRAGMENT_TYPES[
                toks[0]]()  # create instance of the fragment
            return None

        def expand_conditionals(toks, stmts):
            try:
                stmt = toks["value"]
                stmts.append(stmt)
            except KeyError:
                try:
                    conditions = toks["conditional"]
                    for condition in conditions:
                        try:
                            _toks = condition[1]
                            _cond = condition[0]
                            if sdkconfig.evaluate_expression(_cond):
                                expand_conditionals(_toks, stmts)
                                break
                        except IndexError:
                            expand_conditionals(condition[0], stmts)
                except KeyError:
                    for tok in toks:
                        expand_conditionals(tok, stmts)

        def key_body_parsed(pstr, loc, toks):
            stmts = list()
            expand_conditionals(toks, stmts)

            if parse_ctx.key_grammar.min and len(
                    stmts) < parse_ctx.key_grammar.min:
                raise ParseFatalException(
                    pstr, loc,
                    "fragment requires at least %d values for key '%s'" %
                    (parse_ctx.key_grammar.min, parse_ctx.key))

            if parse_ctx.key_grammar.max and len(
                    stmts) > parse_ctx.key_grammar.max:
                raise ParseFatalException(
                    pstr, loc,
                    "fragment requires at most %d values for key '%s'" %
                    (parse_ctx.key_grammar.max, parse_ctx.key))

            try:
                parse_ctx.fragment.set_key_value(parse_ctx.key, stmts)
            except Exception as e:
                raise ParseFatalException(
                    pstr, loc,
                    "unable to add key '%s'; %s" % (parse_ctx.key, e.message))
            return None

        key = Word(alphanums + "_") + Suppress(":")
        key_stmt = Forward()

        condition_block = indentedBlock(key_stmt, indent_stack)
        key_stmts = OneOrMore(condition_block)
        key_body = Suppress(key) + key_stmts
        key_body.setParseAction(key_body_parsed)

        condition = originalTextFor(
            SDKConfig.get_expression_grammar()).setResultsName("condition")
        if_condition = Group(
            Suppress("if") + condition + Suppress(":") + condition_block)
        elif_condition = Group(
            Suppress("elif") + condition + Suppress(":") + condition_block)
        else_condition = Group(
            Suppress("else") + Suppress(":") + condition_block)
        conditional = (if_condition + Optional(OneOrMore(elif_condition)) +
                       Optional(else_condition)).setResultsName("conditional")

        def key_parse_action(pstr, loc, toks):
            key = toks[0]

            if key in parse_ctx.keys:
                raise ParseFatalException(
                    pstr, loc,
                    "duplicate key '%s' value definition" % parse_ctx.key)

            parse_ctx.key = key
            parse_ctx.keys.append(key)

            try:
                parse_ctx.key_grammar = parse_ctx.fragment.get_key_grammars(
                )[key]
                key_grammar = parse_ctx.key_grammar.grammar
            except KeyError:
                raise ParseFatalException(
                    pstr, loc, "key '%s' is not supported by fragment" % key)
            except Exception as e:
                raise ParseFatalException(
                    pstr, loc,
                    "unable to parse key '%s'; %s" % (key, e.message))

            key_stmt << (conditional
                         | Group(key_grammar).setResultsName("value"))

            return None

        def name_parse_action(pstr, loc, toks):
            parse_ctx.fragment.name = toks[0]

        key.setParseAction(key_parse_action)

        ftype = Word(alphas).setParseAction(fragment_type_parse_action)
        fid = Suppress(":") + Word(alphanums + "_.").setResultsName("name")
        fid.setParseAction(name_parse_action)
        header = Suppress("[") + ftype + fid + Suppress("]")

        def fragment_parse_action(pstr, loc, toks):
            key_grammars = parse_ctx.fragment.get_key_grammars()
            required_keys = set(
                [k for (k, v) in key_grammars.items() if v.required])
            present_keys = required_keys.intersection(set(parse_ctx.keys))
            if present_keys != required_keys:
                raise ParseFatalException(
                    pstr, loc, "required keys %s for fragment not found" %
                    list(required_keys - present_keys))
            return parse_ctx.fragment

        fragment_stmt = Forward()
        fragment_block = indentedBlock(fragment_stmt, indent_stack)

        fragment_if_condition = Group(
            Suppress("if") + condition + Suppress(":") + fragment_block)
        fragment_elif_condition = Group(
            Suppress("elif") + condition + Suppress(":") + fragment_block)
        fragment_else_condition = Group(
            Suppress("else") + Suppress(":") + fragment_block)
        fragment_conditional = (
            fragment_if_condition +
            Optional(OneOrMore(fragment_elif_condition)) +
            Optional(fragment_else_condition)).setResultsName("conditional")

        fragment = (header +
                    OneOrMore(indentedBlock(key_body, indent_stack,
                                            False))).setResultsName("value")
        fragment.setParseAction(fragment_parse_action)
        fragment.ignore("#" + restOfLine)

        deprecated_mapping = DeprecatedMapping.get_fragment_grammar(
            sdkconfig, fragment_file.name).setResultsName("value")

        fragment_stmt << (Group(deprecated_mapping) | Group(fragment)
                          | Group(fragment_conditional))

        def fragment_stmt_parsed(pstr, loc, toks):
            stmts = list()
            expand_conditionals(toks, stmts)
            return stmts

        parser = ZeroOrMore(fragment_stmt)
        parser.setParseAction(fragment_stmt_parsed)

        self.fragments = parser.parseFile(fragment_file, parseAll=True)

        for fragment in self.fragments:
            fragment.path = path
Ejemplo n.º 51
0
Archivo: io.py Proyecto: mrihtar/orange
from Orange.core import \
     BasketFeeder, FileExampleGenerator, BasketExampleGenerator, \
     C45ExampleGenerator, TabDelimExampleGenerator, \
     registerFileType as register_file_type

import Orange.feature as variable
from Orange.feature import Descriptor
MakeStatus = Orange.feature.Descriptor.MakeStatus
make = Orange.feature.Descriptor.make

from pyparsing import (printables, originalTextFor, OneOrMore, 
     quotedString, Word, delimitedList)

# unquoted words can contain anything but a colon
printables_no_colon = printables.replace(',', '')
content = originalTextFor(OneOrMore(quotedString | Word(printables_no_colon)))

def loadARFF(filename, create_on_new=MakeStatus.Incompatible, **kwargs):
    """Return class:`Orange.data.Table` containing data from file in Weka ARFF format
       if there exists no .xml file with the same name. If it does, a multi-label
       dataset is read and returned.
    """
    if filename[-5:] == ".arff":
        filename = filename[:-5]
    if os.path.exists(filename + ".xml") and os.path.exists(filename + ".arff"):
        xml_name = filename + ".xml"
        arff_name = filename + ".arff"
        return Orange.multilabel.mulan.trans_mulan_data(xml_name, arff_name, create_on_new)
    else:
        return loadARFF_Weka(filename, create_on_new)
Ejemplo n.º 52
0
from pyparsing import ParseException, FollowedBy, Suppress, OneOrMore, Word, \
    LineEnd, ZeroOrMore, Optional, Literal, alphas, alphanums, \
    originalTextFor, nestedExpr, quotedString, removeQuotes, lineEnd, \
    empty, col, restOfLine, delimitedList, Each, Keyword, commaSeparatedList, \
    Group

from .errors import FieldParserException, ModelParserException
from .registry import fields_definitions, models_definitions, parsers

ParseException.defaultWhitespaceChars = (' \r\t')

COMMENT = (Literal("#") + restOfLine + LineEnd()).suppress()

IDENT = Word(alphanums + '_')
DICT_DEF = originalTextFor(nestedExpr('{', '}'))
LIST_DEF = originalTextFor(nestedExpr('[', ']'))
DICT_ACCESS = LIST_ACCESS = originalTextFor(IDENT + nestedExpr('[', ']'))

PYTHON_ALLOWED_EXPR = (DICT_DEF ^ LIST_DEF ^ DICT_ACCESS ^
                       LIST_ACCESS ^ restOfLine
                       ).setParseAction(lambda toks: toks[0])


def indentedBlock(expr, indent_stack, indent=True):
    """Define space-delimited indentation blocks.

    Helper method for defining space-delimited indentation blocks, such as
    those used to define block statements in Python source code.

    There is also a version in pyparsing but doesn't seem to be working fine
Ejemplo n.º 53
0
    def get_fragment_grammar(sdkconfig, fragment_file):

        # Match header [mapping]
        header = Suppress("[") + Suppress("mapping") + Suppress("]")

        # There are three possible patterns for mapping entries:
        #       obj:symbol (scheme)
        #       obj (scheme)
        #       * (scheme)
        obj = Fragment.ENTITY.setResultsName("object")
        symbol = Suppress(":") + Fragment.IDENTIFIER.setResultsName("symbol")
        scheme = Suppress("(") + Fragment.IDENTIFIER.setResultsName(
            "scheme") + Suppress(")")

        pattern1 = Group(obj + symbol + scheme)
        pattern2 = Group(obj + scheme)
        pattern3 = Group(
            Literal(Mapping.MAPPING_ALL_OBJECTS).setResultsName("object") +
            scheme)

        mapping_entry = pattern1 | pattern2 | pattern3

        # To simplify parsing, classify groups of condition-mapping entry into two types: normal and default
        # A normal grouping is one with a non-default condition. The default grouping is one which contains the
        # default condition
        mapping_entries = Group(
            ZeroOrMore(mapping_entry)).setResultsName("mappings")

        normal_condition = Suppress(":") + originalTextFor(
            SDKConfig.get_expression_grammar())
        default_condition = Optional(
            Suppress(":") + Literal(DeprecatedMapping.DEFAULT_CONDITION))

        normal_group = Group(
            normal_condition.setResultsName("condition") + mapping_entries)
        default_group = Group(default_condition +
                              mapping_entries).setResultsName("default_group")

        normal_groups = Group(
            ZeroOrMore(normal_group)).setResultsName("normal_groups")

        # Any mapping fragment definition can have zero or more normal group and only one default group as a last entry.
        archive = Suppress("archive") + Suppress(
            ":") + Fragment.ENTITY.setResultsName("archive")
        entries = Suppress("entries") + Suppress(":") + (
            normal_groups + default_group).setResultsName("entries")

        mapping = Group(header + archive + entries)
        mapping.ignore("#" + restOfLine)

        def parsed_deprecated_mapping(pstr, loc, toks):
            fragment = Mapping()
            fragment.archive = toks[0].archive
            fragment.name = re.sub(r"[^0-9a-zA-Z]+", "_", fragment.archive)
            fragment.deprecated = True

            fragment.entries = set()
            condition_true = False
            for entries in toks[0].entries[0]:
                condition = next(iter(entries.condition.asList())).strip()
                condition_val = sdkconfig.evaluate_expression(condition)

                if condition_val:
                    for entry in entries[1]:
                        fragment.entries.add(
                            (entry.object,
                             None if entry.symbol == '' else entry.symbol,
                             entry.scheme))
                    condition_true = True
                    break

            if not fragment.entries and not condition_true:
                try:
                    entries = toks[0].entries[1][1]
                except IndexError:
                    entries = toks[0].entries[1][0]
                for entry in entries:
                    fragment.entries.add(
                        (entry.object,
                         None if entry.symbol == '' else entry.symbol,
                         entry.scheme))

            if not fragment.entries:
                fragment.entries.add(("*", None, "default"))

            dep_warning = str(
                ParseFatalException(
                    pstr, loc,
                    "Warning: Deprecated old-style mapping fragment parsed in file %s."
                    % fragment_file))

            print(dep_warning)
            return fragment

        mapping.setParseAction(parsed_deprecated_mapping)
        return mapping
Ejemplo n.º 54
0
 def parse_element(cls, indent_stack):
     return (Keyword("@only_if").suppress() +
             originalTextFor(nestedExpr())
             ).setResultsName("only_if").setParseAction(lambda toks: toks[0])
Ejemplo n.º 55
0
emoji = Regex(':[\\S]+:').setResultsName('emoji')
message = OneOrMore(Word(alphanums + "#")).setResultsName('message')


def tail(name):
    return Suppress(White(max=1)) + CharsNotIn('').setResultsName(name)

channel_name = Word(alphanums + '-').setResultsName('channel')

user_name = Word(alphanums + '-_.')

link = Word(printables)

int_num = Word(nums)

dumb_single_quotes = QuotedString("‘", endQuoteChar="’", escChar="\\")
dumb_double_quotes = QuotedString("“", endQuoteChar="”", escChar="\\")
quotedString.addParseAction(removeQuotes)
comma_list = delimitedList((dumb_single_quotes | dumb_double_quotes | quotedString
                            | originalTextFor(OneOrMore(Word(printables, excludeChars=","))))).setResultsName('comma_list')


def flag(name):
    dashes = '--' if len(name) > 1 else '-'
    return CaselessLiteral(dashes + name).setResultsName(name)


def flag_with_arg(name, argtype):
    dashes = '--' if len(name) > 1 else '-'
    return CaselessLiteral(dashes + name) + argtype.setResultsName(name)
    def __init__(self):

        # Bibtex keywords

        string_def_start = pp.CaselessKeyword("@string")
        preamble_start = pp.CaselessKeyword("@preamble")
        comment_line_start = pp.CaselessKeyword('@comment')

        # String names
        string_name = pp.Word(pp.alphanums + '_')('StringName')
        self.set_string_name_parse_action(lambda s, l, t: None)
        string_name.addParseAction(self._string_name_parse_action)

        # Values inside bibtex fields
        # Values can be integer or string expressions. The latter may use
        # quoted or braced values.

        # Integer values
        integer = pp.Word(pp.nums)('Integer')

        # Braced values: braced values can contain nested (but balanced) braces
        braced_value_content = pp.CharsNotIn('{}')
        braced_value = pp.Forward()  # Recursive definition for nested braces
        braced_value <<= pp.originalTextFor(
            '{' + pp.ZeroOrMore(braced_value | braced_value_content) + '}'
            )('BracedValue')
        braced_value.setParseAction(remove_braces)
        # TODO add ignore for "\}" and "\{" ?
        # TODO @ are not parsed by bibtex in braces

        # Quoted values: may contain braced content with balanced braces
        brace_in_quoted = pp.nestedExpr('{', '}')
        text_in_quoted = pp.CharsNotIn('"{}')
        # (quotes should be escaped in quoted value)
        quoted_value = pp.originalTextFor(
            '"' +
            pp.ZeroOrMore(text_in_quoted | brace_in_quoted) +
            '"')('QuotedValue')
        quoted_value.addParseAction(pp.removeQuotes)

        # String expressions
        string_expr = pp.delimitedList(
            (quoted_value | braced_value | string_name), delim='#'
            )('StringExpression')
        self.set_string_expression_parse_action(lambda s, l, t: None)
        string_expr.addParseAction(self._string_expr_parse_action)

        value = (integer | string_expr)('Value')

        # Entries

        # @EntryType { ...
        entry_type = (pp.Suppress('@') + pp.Word(pp.alphas))('EntryType')
        entry_type.setParseAction(first_token)

        # Entry key: any character up to a ',' without leading and trailing
        # spaces.
        key = pp.SkipTo(',')('Key')  # Exclude @',\#}{~%
        key.setParseAction(lambda s, l, t: first_token(s, l, t).strip())

        # Field name: word of letters and underscores
        field_name = pp.Word(pp.alphas + '_')('FieldName')
        field_name.setParseAction(first_token)

        # Field: field_name = value
        field = pp.Group(field_name + pp.Suppress('=') + value)('Field')
        field.setParseAction(field_to_pair)

        # List of fields: comma separeted fields
        field_list = (pp.delimitedList(field) + pp.Suppress(pp.Optional(','))
                      )('Fields')
        field_list.setParseAction(
            lambda s, l, t: {k: v for (k, v) in reversed(t.get('Fields'))})

        # Entry: type, key, and fields
        self.entry = (entry_type +
                      in_braces_or_pars(key + pp.Suppress(',') + field_list)
                      )('Entry')

        # Other stuff: comments, string definitions, and preamble declarations

        # Explicit comments: @comment + everything up to next valid declaration
        # starting on new line.
        not_an_implicit_comment = (pp.LineStart() + pp.Literal('@')
                                   ) | pp.stringEnd()
        self.explicit_comment = (
            pp.Suppress(comment_line_start) +
            pp.originalTextFor(pp.SkipTo(not_an_implicit_comment),
                               asString=True))('ExplicitComment')
        self.explicit_comment.addParseAction(remove_trailing_newlines)
        self.explicit_comment.addParseAction(remove_braces)
        # Previous implementation included comment until next '}'.
        # This is however not inline with bibtex behavior that is to only
        # ignore until EOL. Brace stipping is arbitrary here but avoids
        # duplication on bibtex write.

        # Empty implicit_comments lead to infinite loop of zeroOrMore
        def mustNotBeEmpty(t):
            if not t[0]:
                raise pp.ParseException("Match must not be empty.")

        # Implicit comments: not anything else
        self.implicit_comment = pp.originalTextFor(
            pp.SkipTo(not_an_implicit_comment).setParseAction(mustNotBeEmpty),
            asString=True)('ImplicitComment')
        self.implicit_comment.addParseAction(remove_trailing_newlines)

        # String definition
        self.string_def = (pp.Suppress(string_def_start) + in_braces_or_pars(
            string_name +
            pp.Suppress('=') +
            string_expr('StringValue')
            ))('StringDefinition')

        # Preamble declaration
        self.preamble_decl = (pp.Suppress(preamble_start) +
                              in_braces_or_pars(value))('PreambleDeclaration')

        # Main bibtex expression

        self.main_expression = pp.ZeroOrMore(
                self.string_def |
                self.preamble_decl |
                self.explicit_comment |
                self.entry |
                self.implicit_comment)
Ejemplo n.º 57
0
@author: luca

Submitted by Luca DallOlio, September, 2010
(Minor updates by Paul McGuire, June, 2012)
'''
from pyparsing import Word, ZeroOrMore, printables, Suppress, OneOrMore, Group, \
    LineEnd, Optional, White, originalTextFor, hexnums, nums, Combine, Literal, Keyword, \
    cStyleComment, Regex, Forward, MatchFirst, And, srange, oneOf, alphas, alphanums, \
    delimitedList

# http://www.antlr.org/grammar/ANTLR/ANTLRv3.g

# Tokens
EOL = Suppress(LineEnd()) # $
singleTextString = originalTextFor(ZeroOrMore(~EOL + (White(" \t") | Word(printables)))).leaveWhitespace()
XDIGIT = hexnums
INT = Word(nums)
ESC = Literal('\\') + (oneOf(list(r'nrtbf\">'+"'")) | ('u' + Word(hexnums, exact=4)) | Word(printables, exact=1))
LITERAL_CHAR = ESC | ~(Literal("'") | Literal('\\')) + Word(printables, exact=1)
CHAR_LITERAL = Suppress("'") + LITERAL_CHAR + Suppress("'")
STRING_LITERAL = Suppress("'") + Combine(OneOrMore(LITERAL_CHAR)) + Suppress("'") 
DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
DOUBLE_ANGLE_STRING_LITERAL = '<<' + ZeroOrMore(Word(printables, exact=1)) + '>>'
TOKEN_REF = Word(alphas.upper(), alphanums+'_')
RULE_REF = Word(alphas.lower(), alphanums+'_')
ACTION_ESC = (Suppress("\\") + Suppress("'")) | Suppress('\\"') | Suppress('\\') + (~(Literal("'") | Literal('"')) + Word(printables, exact=1))
ACTION_CHAR_LITERAL = Suppress("'") + (ACTION_ESC | ~(Literal('\\') | Literal("'")) + Word(printables, exact=1)) + Suppress("'")
ACTION_STRING_LITERAL = Suppress('"') + ZeroOrMore(ACTION_ESC | ~(Literal('\\') | Literal('"')) + Word(printables, exact=1)) + Suppress('"') 
SRC = Suppress('src') + ACTION_STRING_LITERAL("file") + INT("line")
id = TOKEN_REF | RULE_REF
Ejemplo n.º 58
0
URI = Regex(r'[^ ]+')("url")
URL = (AT + URI)

EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")

VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE)
VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE)

VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE),
                       joinString=",", adjacent=False)("_raw_spec")
_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY))
_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '')

VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier")
VERSION_SPEC.setParseAction(lambda s, l, t: t[1])

MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker")
MARKER_EXPR.setParseAction(
    lambda s, l, t: Marker(s[t._original_start:t._original_end])
)
MARKER_SEPERATOR = SEMICOLON
MARKER = MARKER_SEPERATOR + MARKER_EXPR

VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER)
URL_AND_MARKER = URL + Optional(MARKER)

NAMED_REQUIREMENT = \
    NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER)
Ejemplo n.º 59
0
def __parse_variable(to_parse):
    return (
        pp.originalTextFor(parse.VARIABLE)('result')
        .leaveWhitespace()
        .parseWithTabs()
        .parseString(to_parse))['result']
Ejemplo n.º 60
0
notDigLower = notDigname.copy().setParseAction(lambda t: t[0].lower())

macroDef = notDigLower.copy()

macroRef = notDigLower.copy().setParseAction(MacroReference.fromParseResult)
fieldName = notDigLower.copy()
entryType = notDigLower.setResultsName("entry type")
citeKey = anyName.setResultsName("cite key")
string = number | macroRef | quotedString | curlyString

# There can be hash concatenation
fieldValue = string + ZeroOrMore(HASH + string)

namePart = Regex(r"(?!\band\b)[^\s\.,{}]+\.?") | curlyString
nobility = Regex(r"[a-z]\w+\.?(\s[a-z]\w+\.?)*").setResultsName("nobility")  # "van" etc.
spacedNames = originalTextFor(OneOrMore(namePart))
firstNames = spacedNames.copy().setResultsName("firstname")
lastNames = spacedNames.copy().setResultsName("lastname")
nameSuffix = namePart.copy().setResultsName("suffix")

# a name in "comma separated" style, like "Helmling, Michael"
csName = Optional(nobility) + lastNames + COMMA + Optional(nameSuffix + COMMA) + firstNames


def labelLiteralName(toks):
    """In case of a literal name, we cannot distinguish between first and middle names, or
    recognize multi-part last names. Hence it is assumed that the last part is the last name,
    anything else is stored as first names.
    """
    toks["lastname"] = toks[-1]
    if len(toks) > 1: